Beispiel #1
0
void *aom_calloc(size_t num, size_t size) {
  void *x;

  x = aom_memalign(DEFAULT_ALIGNMENT, num * size);

  if (x) memset(x, 0, num * size);

  return x;
}
Beispiel #2
0
struct aom_noise_tx_t *aom_noise_tx_malloc(int block_size) {
  struct aom_noise_tx_t *noise_tx =
      (struct aom_noise_tx_t *)aom_malloc(sizeof(struct aom_noise_tx_t));
  if (!noise_tx) return NULL;
  memset(noise_tx, 0, sizeof(*noise_tx));
  switch (block_size) {
    case 2:
      noise_tx->fft = aom_fft2x2_float;
      noise_tx->ifft = aom_ifft2x2_float;
      break;
    case 4:
      noise_tx->fft = aom_fft4x4_float;
      noise_tx->ifft = aom_ifft4x4_float;
      break;
    case 8:
      noise_tx->fft = aom_fft8x8_float;
      noise_tx->ifft = aom_ifft8x8_float;
      break;
    case 16:
      noise_tx->fft = aom_fft16x16_float;
      noise_tx->ifft = aom_ifft16x16_float;
      break;
    case 32:
      noise_tx->fft = aom_fft32x32_float;
      noise_tx->ifft = aom_ifft32x32_float;
      break;
    default:
      aom_free(noise_tx);
      fprintf(stderr, "Unsupported block size %d\n", block_size);
      return NULL;
  }
  noise_tx->block_size = block_size;
  noise_tx->tx_block = (float *)aom_memalign(
      32, 2 * sizeof(*noise_tx->tx_block) * block_size * block_size);
  noise_tx->temp = (float *)aom_memalign(
      32, 2 * sizeof(*noise_tx->temp) * block_size * block_size);
  if (!noise_tx->tx_block || !noise_tx->temp) {
    aom_noise_tx_free(noise_tx);
    return NULL;
  }
  return noise_tx;
}
Beispiel #3
0
void *aom_malloc(size_t size) { return aom_memalign(DEFAULT_ALIGNMENT, size); }
Beispiel #4
0
void av1_encode_tiles_mt(AV1_COMP *cpi) {
  AV1_COMMON *const cm = &cpi->common;
  const int tile_cols = cm->tile_cols;
  const AVxWorkerInterface *const winterface = aom_get_worker_interface();
  const int num_workers = AOMMIN(cpi->oxcf.max_threads, tile_cols);
  int i;

  av1_init_tile_data(cpi);

  // Only run once to create threads and allocate thread data.
  if (cpi->num_workers == 0) {
    CHECK_MEM_ERROR(cm, cpi->workers,
                    aom_malloc(num_workers * sizeof(*cpi->workers)));

    CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
                    aom_calloc(num_workers, sizeof(*cpi->tile_thr_data)));

    for (i = 0; i < num_workers; i++) {
      AVxWorker *const worker = &cpi->workers[i];
      EncWorkerData *const thread_data = &cpi->tile_thr_data[i];

      ++cpi->num_workers;
      winterface->init(worker);

      thread_data->cpi = cpi;

      if (i < num_workers - 1) {
        // Allocate thread data.
        CHECK_MEM_ERROR(cm, thread_data->td,
                        aom_memalign(32, sizeof(*thread_data->td)));
        av1_zero(*thread_data->td);

        // Set up pc_tree.
        thread_data->td->leaf_tree = NULL;
        thread_data->td->pc_tree = NULL;
        av1_setup_pc_tree(cm, thread_data->td);

        // Set up variance tree if needed.
        if (cpi->sf.partition_search_type == VAR_BASED_PARTITION)
          av1_setup_var_tree(cm, thread_data->td);

        // Allocate frame counters in thread data.
        CHECK_MEM_ERROR(cm, thread_data->td->counts,
                        aom_calloc(1, sizeof(*thread_data->td->counts)));

        // Create threads
        if (!winterface->reset(worker))
          aom_internal_error(&cm->error, AOM_CODEC_ERROR,
                             "Tile encoder thread creation failed");
      } else {
        // Main thread acts as a worker and uses the thread data in cpi.
        thread_data->td = &cpi->td;
      }

      winterface->sync(worker);
    }
  }

  for (i = 0; i < num_workers; i++) {
    AVxWorker *const worker = &cpi->workers[i];
    EncWorkerData *thread_data;

    worker->hook = (AVxWorkerHook)enc_worker_hook;
    worker->data1 = &cpi->tile_thr_data[i];
    worker->data2 = NULL;
    thread_data = (EncWorkerData *)worker->data1;

    // Before encoding a frame, copy the thread data from cpi.
    if (thread_data->td != &cpi->td) {
      thread_data->td->mb = cpi->td.mb;
      thread_data->td->rd_counts = cpi->td.rd_counts;
    }
    if (thread_data->td->counts != &cpi->common.counts) {
      memcpy(thread_data->td->counts, &cpi->common.counts,
             sizeof(cpi->common.counts));
    }

#if CONFIG_PALETTE
    // Allocate buffers used by palette coding mode.
    if (cpi->common.allow_screen_content_tools && i < num_workers - 1) {
      MACROBLOCK *x = &thread_data->td->mb;
      CHECK_MEM_ERROR(cm, x->palette_buffer,
                      aom_memalign(16, sizeof(*x->palette_buffer)));
    }
#endif  // CONFIG_PALETTE
  }

  // Encode a frame
  for (i = 0; i < num_workers; i++) {
    AVxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;

    // Set the starting tile for each thread.
    thread_data->start = i;

    if (i == cpi->num_workers - 1)
      winterface->execute(worker);
    else
      winterface->launch(worker);
  }

  // Encoding ends.
  for (i = 0; i < num_workers; i++) {
    AVxWorker *const worker = &cpi->workers[i];
    winterface->sync(worker);
  }

  for (i = 0; i < num_workers; i++) {
    AVxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;

    // Accumulate counters.
    if (i < cpi->num_workers - 1) {
      av1_accumulate_frame_counts(&cm->counts, thread_data->td->counts);
      accumulate_rd_opt(&cpi->td, thread_data->td);
    }
  }
}
Beispiel #5
0
AV1Decoder *av1_decoder_create(BufferPool *const pool) {
  AV1Decoder *volatile const pbi = aom_memalign(32, sizeof(*pbi));
  AV1_COMMON *volatile const cm = pbi ? &pbi->common : NULL;

  if (!cm) return NULL;

  av1_zero(*pbi);

  if (setjmp(cm->error.jmp)) {
    cm->error.setjmp = 0;
    av1_decoder_remove(pbi);
    return NULL;
  }

  cm->error.setjmp = 1;

  CHECK_MEM_ERROR(cm, cm->fc,
                  (FRAME_CONTEXT *)aom_memalign(32, sizeof(*cm->fc)));
  CHECK_MEM_ERROR(cm, cm->frame_contexts,
                  (FRAME_CONTEXT *)aom_memalign(
                      32, FRAME_CONTEXTS * sizeof(*cm->frame_contexts)));
  memset(cm->fc, 0, sizeof(*cm->fc));
  memset(cm->frame_contexts, 0, FRAME_CONTEXTS * sizeof(*cm->frame_contexts));

  pbi->need_resync = 1;
  once(initialize_dec);

  // Initialize the references to not point to any frame buffers.
  memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
  memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map));

  cm->current_video_frame = 0;
  pbi->ready_for_new_data = 1;
  pbi->common.buffer_pool = pool;

  cm->bit_depth = AOM_BITS_8;
  cm->dequant_bit_depth = AOM_BITS_8;

  cm->alloc_mi = av1_dec_alloc_mi;
  cm->free_mi = av1_dec_free_mi;
  cm->setup_mi = av1_dec_setup_mi;

  av1_loop_filter_init(cm);

#if CONFIG_NCOBMC_ADAPT_WEIGHT
  get_default_ncobmc_kernels(cm);
#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT

#if CONFIG_AOM_QM
  aom_qm_init(cm);
#endif
#if CONFIG_LOOP_RESTORATION
  av1_loop_restoration_precal();
#endif  // CONFIG_LOOP_RESTORATION
#if CONFIG_ACCOUNTING
  pbi->acct_enabled = 1;
  aom_accounting_init(&pbi->accounting);
#endif

  cm->error.setjmp = 0;

  aom_get_worker_interface()->init(&pbi->lf_worker);

  return pbi;
}
Beispiel #6
0
int aom_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
                             int ss_x, int ss_y,
#if CONFIG_HIGHBITDEPTH
                             int use_highbitdepth,
#endif
                             int border, int byte_alignment,
                             aom_codec_frame_buffer_t *fb,
                             aom_get_frame_buffer_cb_fn_t cb, void *cb_priv) {
  if (ybf) {
    const int aom_byte_align = (byte_alignment == 0) ? 1 : byte_alignment;
    const int aligned_width = (width + 7) & ~7;
    const int aligned_height = (height + 7) & ~7;
    const int y_stride = ((aligned_width + 2 * border) + 31) & ~31;
    const uint64_t yplane_size =
        (aligned_height + 2 * border) * (uint64_t)y_stride + byte_alignment;
    const int uv_width = aligned_width >> ss_x;
    const int uv_height = aligned_height >> ss_y;
    const int uv_stride = y_stride >> ss_x;
    const int uv_border_w = border >> ss_x;
    const int uv_border_h = border >> ss_y;
    const uint64_t uvplane_size =
        (uv_height + 2 * uv_border_h) * (uint64_t)uv_stride + byte_alignment;

#if CONFIG_HIGHBITDEPTH
    const uint64_t frame_size =
        (1 + use_highbitdepth) * (yplane_size + 2 * uvplane_size);
#else
    const uint64_t frame_size = yplane_size + 2 * uvplane_size;
#endif  // CONFIG_HIGHBITDEPTH

    uint8_t *buf = NULL;

    if (cb != NULL) {
      const int align_addr_extra_size = 31;
      const uint64_t external_frame_size = frame_size + align_addr_extra_size;

      assert(fb != NULL);

      if (external_frame_size != (size_t)external_frame_size) return -1;

      // Allocation to hold larger frame, or first allocation.
      if (cb(cb_priv, (size_t)external_frame_size, fb) < 0) return -1;

      if (fb->data == NULL || fb->size < external_frame_size) return -1;

      ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32);

#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
      // This memset is needed for fixing the issue of using uninitialized
      // value in msan test. It will cause a perf loss, so only do this for
      // msan test.
      memset(ybf->buffer_alloc, 0, (int)frame_size);
#endif
#endif
    } else if (frame_size > (size_t)ybf->buffer_alloc_sz) {
      // Allocation to hold larger frame, or first allocation.
      aom_free(ybf->buffer_alloc);
      ybf->buffer_alloc = NULL;

      if (frame_size != (size_t)frame_size) return -1;

      ybf->buffer_alloc = (uint8_t *)aom_memalign(32, (size_t)frame_size);
      if (!ybf->buffer_alloc) return -1;

      ybf->buffer_alloc_sz = (size_t)frame_size;

      // This memset is needed for fixing valgrind error from C loop filter
      // due to access uninitialized memory in frame border. It could be
      // removed if border is totally removed.
      memset(ybf->buffer_alloc, 0, ybf->buffer_alloc_sz);
    }

    /* Only support allocating buffers that have a border that's a multiple
     * of 32. The border restriction is required to get 16-byte alignment of
     * the start of the chroma rows without introducing an arbitrary gap
     * between planes, which would break the semantics of things like
     * aom_img_set_rect(). */
    if (border & 0x1f) return -3;

    ybf->y_crop_width = width;
    ybf->y_crop_height = height;
    ybf->y_width = aligned_width;
    ybf->y_height = aligned_height;
    ybf->y_stride = y_stride;

    ybf->uv_crop_width = (width + ss_x) >> ss_x;
    ybf->uv_crop_height = (height + ss_y) >> ss_y;
    ybf->uv_width = uv_width;
    ybf->uv_height = uv_height;
    ybf->uv_stride = uv_stride;

    ybf->border = border;
    ybf->frame_size = (size_t)frame_size;
    ybf->subsampling_x = ss_x;
    ybf->subsampling_y = ss_y;

    buf = ybf->buffer_alloc;
#if CONFIG_HIGHBITDEPTH
    if (use_highbitdepth) {
      // Store uint16 addresses when using 16bit framebuffers
      buf = CONVERT_TO_BYTEPTR(ybf->buffer_alloc);
      ybf->flags = YV12_FLAG_HIGHBITDEPTH;
    } else {
      ybf->flags = 0;
    }
#endif  // CONFIG_HIGHBITDEPTH

    ybf->y_buffer = (uint8_t *)yv12_align_addr(
        buf + (border * y_stride) + border, aom_byte_align);
    ybf->u_buffer = (uint8_t *)yv12_align_addr(
        buf + yplane_size + (uv_border_h * uv_stride) + uv_border_w,
        aom_byte_align);
    ybf->v_buffer =
        (uint8_t *)yv12_align_addr(buf + yplane_size + uvplane_size +
                                       (uv_border_h * uv_stride) + uv_border_w,
                                   aom_byte_align);

#if CONFIG_HIGHBITDEPTH && CONFIG_GLOBAL_MOTION
    if (ybf->y_buffer_8bit) {
      free(ybf->y_buffer_8bit);
      ybf->y_buffer_8bit = NULL;
    }
#endif

    ybf->corrupted = 0; /* assume not corrupted by errors */
    return 0;
  }
  return -2;
}