Пример #1
0
void av1_decoder_remove(AV1Decoder *pbi) {
  int i;

  if (!pbi) return;

  aom_get_worker_interface()->end(&pbi->lf_worker);
  aom_free(pbi->lf_worker.data1);
  aom_free(pbi->tile_data);
  for (i = 0; i < pbi->num_tile_workers; ++i) {
    AVxWorker *const worker = &pbi->tile_workers[i];
    aom_get_worker_interface()->end(worker);
  }
  aom_free(pbi->tile_worker_data);
  aom_free(pbi->tile_worker_info);
  aom_free(pbi->tile_workers);

  if (pbi->num_tile_workers > 0) {
    av1_loop_filter_dealloc(&pbi->lf_row_sync);
  }

#if CONFIG_ACCOUNTING
  aom_accounting_clear(&pbi->accounting);
#endif

  aom_free(pbi);
}
Пример #2
0
void av1_encode_tiles_mt(AV1_COMP *cpi) {
  AV1_COMMON *const cm = &cpi->common;
  const int tile_cols = cm->tile_cols;
  const AVxWorkerInterface *const winterface = aom_get_worker_interface();
  const int num_workers = AOMMIN(cpi->oxcf.max_threads, tile_cols);
  int i;

  av1_init_tile_data(cpi);

  // Only run once to create threads and allocate thread data.
  if (cpi->num_workers == 0) {
    CHECK_MEM_ERROR(cm, cpi->workers,
                    aom_malloc(num_workers * sizeof(*cpi->workers)));

    CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
                    aom_calloc(num_workers, sizeof(*cpi->tile_thr_data)));

    for (i = 0; i < num_workers; i++) {
      AVxWorker *const worker = &cpi->workers[i];
      EncWorkerData *const thread_data = &cpi->tile_thr_data[i];

      ++cpi->num_workers;
      winterface->init(worker);

      thread_data->cpi = cpi;

      if (i < num_workers - 1) {
        // Allocate thread data.
        CHECK_MEM_ERROR(cm, thread_data->td,
                        aom_memalign(32, sizeof(*thread_data->td)));
        av1_zero(*thread_data->td);

        // Set up pc_tree.
        thread_data->td->leaf_tree = NULL;
        thread_data->td->pc_tree = NULL;
        av1_setup_pc_tree(cm, thread_data->td);

        // Set up variance tree if needed.
        if (cpi->sf.partition_search_type == VAR_BASED_PARTITION)
          av1_setup_var_tree(cm, thread_data->td);

        // Allocate frame counters in thread data.
        CHECK_MEM_ERROR(cm, thread_data->td->counts,
                        aom_calloc(1, sizeof(*thread_data->td->counts)));

        // Create threads
        if (!winterface->reset(worker))
          aom_internal_error(&cm->error, AOM_CODEC_ERROR,
                             "Tile encoder thread creation failed");
      } else {
        // Main thread acts as a worker and uses the thread data in cpi.
        thread_data->td = &cpi->td;
      }

      winterface->sync(worker);
    }
  }

  for (i = 0; i < num_workers; i++) {
    AVxWorker *const worker = &cpi->workers[i];
    EncWorkerData *thread_data;

    worker->hook = (AVxWorkerHook)enc_worker_hook;
    worker->data1 = &cpi->tile_thr_data[i];
    worker->data2 = NULL;
    thread_data = (EncWorkerData *)worker->data1;

    // Before encoding a frame, copy the thread data from cpi.
    if (thread_data->td != &cpi->td) {
      thread_data->td->mb = cpi->td.mb;
      thread_data->td->rd_counts = cpi->td.rd_counts;
    }
    if (thread_data->td->counts != &cpi->common.counts) {
      memcpy(thread_data->td->counts, &cpi->common.counts,
             sizeof(cpi->common.counts));
    }

#if CONFIG_PALETTE
    // Allocate buffers used by palette coding mode.
    if (cpi->common.allow_screen_content_tools && i < num_workers - 1) {
      MACROBLOCK *x = &thread_data->td->mb;
      CHECK_MEM_ERROR(cm, x->palette_buffer,
                      aom_memalign(16, sizeof(*x->palette_buffer)));
    }
#endif  // CONFIG_PALETTE
  }

  // Encode a frame
  for (i = 0; i < num_workers; i++) {
    AVxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;

    // Set the starting tile for each thread.
    thread_data->start = i;

    if (i == cpi->num_workers - 1)
      winterface->execute(worker);
    else
      winterface->launch(worker);
  }

  // Encoding ends.
  for (i = 0; i < num_workers; i++) {
    AVxWorker *const worker = &cpi->workers[i];
    winterface->sync(worker);
  }

  for (i = 0; i < num_workers; i++) {
    AVxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;

    // Accumulate counters.
    if (i < cpi->num_workers - 1) {
      av1_accumulate_frame_counts(&cm->counts, thread_data->td->counts);
      accumulate_rd_opt(&cpi->td, thread_data->td);
    }
  }
}
Пример #3
0
AV1Decoder *av1_decoder_create(BufferPool *const pool) {
  AV1Decoder *volatile const pbi = aom_memalign(32, sizeof(*pbi));
  AV1_COMMON *volatile const cm = pbi ? &pbi->common : NULL;

  if (!cm) return NULL;

  av1_zero(*pbi);

  if (setjmp(cm->error.jmp)) {
    cm->error.setjmp = 0;
    av1_decoder_remove(pbi);
    return NULL;
  }

  cm->error.setjmp = 1;

  CHECK_MEM_ERROR(cm, cm->fc,
                  (FRAME_CONTEXT *)aom_memalign(32, sizeof(*cm->fc)));
  CHECK_MEM_ERROR(cm, cm->frame_contexts,
                  (FRAME_CONTEXT *)aom_memalign(
                      32, FRAME_CONTEXTS * sizeof(*cm->frame_contexts)));
  memset(cm->fc, 0, sizeof(*cm->fc));
  memset(cm->frame_contexts, 0, FRAME_CONTEXTS * sizeof(*cm->frame_contexts));

  pbi->need_resync = 1;
  once(initialize_dec);

  // Initialize the references to not point to any frame buffers.
  memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
  memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map));

  cm->current_video_frame = 0;
  pbi->ready_for_new_data = 1;
  pbi->common.buffer_pool = pool;

  cm->bit_depth = AOM_BITS_8;
  cm->dequant_bit_depth = AOM_BITS_8;

  cm->alloc_mi = av1_dec_alloc_mi;
  cm->free_mi = av1_dec_free_mi;
  cm->setup_mi = av1_dec_setup_mi;

  av1_loop_filter_init(cm);

#if CONFIG_NCOBMC_ADAPT_WEIGHT
  get_default_ncobmc_kernels(cm);
#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT

#if CONFIG_AOM_QM
  aom_qm_init(cm);
#endif
#if CONFIG_LOOP_RESTORATION
  av1_loop_restoration_precal();
#endif  // CONFIG_LOOP_RESTORATION
#if CONFIG_ACCOUNTING
  pbi->acct_enabled = 1;
  aom_accounting_init(&pbi->accounting);
#endif

  cm->error.setjmp = 0;

  aom_get_worker_interface()->init(&pbi->lf_worker);

  return pbi;
}
Пример #4
0
int av1_receive_compressed_data(AV1Decoder *pbi, size_t size,
                                const uint8_t **psource) {
  AV1_COMMON *volatile const cm = &pbi->common;
  BufferPool *volatile const pool = cm->buffer_pool;
  RefCntBuffer *volatile const frame_bufs = cm->buffer_pool->frame_bufs;
  const uint8_t *source = *psource;
  int retcode = 0;
  cm->error.error_code = AOM_CODEC_OK;

  if (size == 0) {
    // This is used to signal that we are missing frames.
    // We do not know if the missing frame(s) was supposed to update
    // any of the reference buffers, but we act conservative and
    // mark only the last buffer as corrupted.
    //
    // TODO(jkoleszar): Error concealment is undefined and non-normative
    // at this point, but if it becomes so, [0] may not always be the correct
    // thing to do here.
    if (cm->frame_refs[0].idx > 0) {
      assert(cm->frame_refs[0].buf != NULL);
      cm->frame_refs[0].buf->corrupted = 1;
    }
  }

  pbi->ready_for_new_data = 0;

  // Find a free buffer for the new frame, releasing the reference previously
  // held.

  // Check if the previous frame was a frame without any references to it.
  // Release frame buffer if not decoding in frame parallel mode.
  if (!cm->frame_parallel_decode && cm->new_fb_idx >= 0 &&
      frame_bufs[cm->new_fb_idx].ref_count == 0)
    pool->release_fb_cb(pool->cb_priv,
                        &frame_bufs[cm->new_fb_idx].raw_frame_buffer);

  // Find a free frame buffer. Return error if can not find any.
  cm->new_fb_idx = get_free_fb(cm);
  if (cm->new_fb_idx == INVALID_IDX) return AOM_CODEC_MEM_ERROR;

  // Assign a MV array to the frame buffer.
  cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];

  pbi->hold_ref_buf = 0;
  if (cm->frame_parallel_decode) {
    AVxWorker *const worker = pbi->frame_worker_owner;
    av1_frameworker_lock_stats(worker);
    frame_bufs[cm->new_fb_idx].frame_worker_owner = worker;
    // Reset decoding progress.
    pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
    pbi->cur_buf->row = -1;
    pbi->cur_buf->col = -1;
    av1_frameworker_unlock_stats(worker);
  } else {
    pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
  }

  if (setjmp(cm->error.jmp)) {
    const AVxWorkerInterface *const winterface = aom_get_worker_interface();
    int i;

    cm->error.setjmp = 0;
    pbi->ready_for_new_data = 1;

    // Synchronize all threads immediately as a subsequent decode call may
    // cause a resize invalidating some allocations.
    winterface->sync(&pbi->lf_worker);
    for (i = 0; i < pbi->num_tile_workers; ++i) {
      winterface->sync(&pbi->tile_workers[i]);
    }

    lock_buffer_pool(pool);
    // Release all the reference buffers if worker thread is holding them.
    if (pbi->hold_ref_buf == 1) {
      int ref_index = 0, mask;
      for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
        const int old_idx = cm->ref_frame_map[ref_index];
        // Current thread releases the holding of reference frame.
        decrease_ref_count(old_idx, frame_bufs, pool);

        // Release the reference frame holding in the reference map for the
        // decoding of the next frame.
        if (mask & 1) decrease_ref_count(old_idx, frame_bufs, pool);
        ++ref_index;
      }

      // Current thread releases the holding of reference frame.
      for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) {
        const int old_idx = cm->ref_frame_map[ref_index];
        decrease_ref_count(old_idx, frame_bufs, pool);
      }
      pbi->hold_ref_buf = 0;
    }
    // Release current frame.
    decrease_ref_count(cm->new_fb_idx, frame_bufs, pool);
    unlock_buffer_pool(pool);

    aom_clear_system_state();
    return -1;
  }
Пример #5
0
static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
                                struct macroblockd_plane *planes, int start,
                                int stop, int y_only, AVxWorker *workers,
                                int nworkers, AV1LfSync *lf_sync) {
#if CONFIG_EXT_PARTITION
  printf(
      "STOPPING: This code has not been modified to work with the "
      "extended coding unit size experiment");
  exit(EXIT_FAILURE);
#endif  // CONFIG_EXT_PARTITION

  const AVxWorkerInterface *const winterface = aom_get_worker_interface();
  // Number of superblock rows and cols
  const int sb_rows = mi_rows_aligned_to_sb(cm) >> cm->mib_size_log2;
  // Decoder may allocate more threads than number of tiles based on user's
  // input.
  const int tile_cols = cm->tile_cols;
  const int num_workers = AOMMIN(nworkers, tile_cols);
  int i;

  if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
      num_workers > lf_sync->num_workers) {
    av1_loop_filter_dealloc(lf_sync);
    av1_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
  }

// Set up loopfilter thread data.
// The decoder is capping num_workers because it has been observed that using
// more threads on the loopfilter than there are cores will hurt performance
// on Android. This is because the system will only schedule the tile decode
// workers on cores equal to the number of tile columns. Then if the decoder
// tries to use more threads for the loopfilter, it will hurt performance
// because of contention. If the multithreading code changes in the future
// then the number of workers used by the loopfilter should be revisited.

#if CONFIG_PARALLEL_DEBLOCKING
  // Initialize cur_sb_col to -1 for all SB rows.
  memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);

  // Filter all the vertical edges in the whole frame
  for (i = 0; i < num_workers; ++i) {
    AVxWorker *const worker = &workers[i];
    LFWorkerData *const lf_data = &lf_sync->lfdata[i];

    worker->hook = (AVxWorkerHook)loop_filter_ver_row_worker;
    worker->data1 = lf_sync;
    worker->data2 = lf_data;

    // Loopfilter data
    av1_loop_filter_data_reset(lf_data, frame, cm, planes);
    lf_data->start = start + i * cm->mib_size;
    lf_data->stop = stop;
    lf_data->y_only = y_only;

    // Start loopfiltering
    if (i == num_workers - 1) {
      winterface->execute(worker);
    } else {
      winterface->launch(worker);
    }
  }

  // Wait till all rows are finished
  for (i = 0; i < num_workers; ++i) {
    winterface->sync(&workers[i]);
  }

  memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
  // Filter all the horizontal edges in the whole frame
  for (i = 0; i < num_workers; ++i) {
    AVxWorker *const worker = &workers[i];
    LFWorkerData *const lf_data = &lf_sync->lfdata[i];

    worker->hook = (AVxWorkerHook)loop_filter_hor_row_worker;
    worker->data1 = lf_sync;
    worker->data2 = lf_data;

    // Loopfilter data
    av1_loop_filter_data_reset(lf_data, frame, cm, planes);
    lf_data->start = start + i * cm->mib_size;
    lf_data->stop = stop;
    lf_data->y_only = y_only;

    // Start loopfiltering
    if (i == num_workers - 1) {
      winterface->execute(worker);
    } else {
      winterface->launch(worker);
    }
  }

  // Wait till all rows are finished
  for (i = 0; i < num_workers; ++i) {
    winterface->sync(&workers[i]);
  }
#else   // CONFIG_PARALLEL_DEBLOCKING
  // Initialize cur_sb_col to -1 for all SB rows.
  memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);

  for (i = 0; i < num_workers; ++i) {
    AVxWorker *const worker = &workers[i];
    LFWorkerData *const lf_data = &lf_sync->lfdata[i];

    worker->hook = (AVxWorkerHook)loop_filter_row_worker;
    worker->data1 = lf_sync;
    worker->data2 = lf_data;

    // Loopfilter data
    av1_loop_filter_data_reset(lf_data, frame, cm, planes);
    lf_data->start = start + i * cm->mib_size;
    lf_data->stop = stop;
    lf_data->y_only = y_only;

    // Start loopfiltering
    if (i == num_workers - 1) {
      winterface->execute(worker);
    } else {
      winterface->launch(worker);
    }
  }

  // Wait till all rows are finished
  for (i = 0; i < num_workers; ++i) {
    winterface->sync(&workers[i]);
  }
#endif  // CONFIG_PARALLEL_DEBLOCKING
}