void *aom_calloc(size_t num, size_t size) { void *x; x = aom_memalign(DEFAULT_ALIGNMENT, num * size); if (x) memset(x, 0, num * size); return x; }
struct aom_noise_tx_t *aom_noise_tx_malloc(int block_size) { struct aom_noise_tx_t *noise_tx = (struct aom_noise_tx_t *)aom_malloc(sizeof(struct aom_noise_tx_t)); if (!noise_tx) return NULL; memset(noise_tx, 0, sizeof(*noise_tx)); switch (block_size) { case 2: noise_tx->fft = aom_fft2x2_float; noise_tx->ifft = aom_ifft2x2_float; break; case 4: noise_tx->fft = aom_fft4x4_float; noise_tx->ifft = aom_ifft4x4_float; break; case 8: noise_tx->fft = aom_fft8x8_float; noise_tx->ifft = aom_ifft8x8_float; break; case 16: noise_tx->fft = aom_fft16x16_float; noise_tx->ifft = aom_ifft16x16_float; break; case 32: noise_tx->fft = aom_fft32x32_float; noise_tx->ifft = aom_ifft32x32_float; break; default: aom_free(noise_tx); fprintf(stderr, "Unsupported block size %d\n", block_size); return NULL; } noise_tx->block_size = block_size; noise_tx->tx_block = (float *)aom_memalign( 32, 2 * sizeof(*noise_tx->tx_block) * block_size * block_size); noise_tx->temp = (float *)aom_memalign( 32, 2 * sizeof(*noise_tx->temp) * block_size * block_size); if (!noise_tx->tx_block || !noise_tx->temp) { aom_noise_tx_free(noise_tx); return NULL; } return noise_tx; }
void *aom_malloc(size_t size) { return aom_memalign(DEFAULT_ALIGNMENT, size); }
void av1_encode_tiles_mt(AV1_COMP *cpi) { AV1_COMMON *const cm = &cpi->common; const int tile_cols = cm->tile_cols; const AVxWorkerInterface *const winterface = aom_get_worker_interface(); const int num_workers = AOMMIN(cpi->oxcf.max_threads, tile_cols); int i; av1_init_tile_data(cpi); // Only run once to create threads and allocate thread data. if (cpi->num_workers == 0) { CHECK_MEM_ERROR(cm, cpi->workers, aom_malloc(num_workers * sizeof(*cpi->workers))); CHECK_MEM_ERROR(cm, cpi->tile_thr_data, aom_calloc(num_workers, sizeof(*cpi->tile_thr_data))); for (i = 0; i < num_workers; i++) { AVxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = &cpi->tile_thr_data[i]; ++cpi->num_workers; winterface->init(worker); thread_data->cpi = cpi; if (i < num_workers - 1) { // Allocate thread data. CHECK_MEM_ERROR(cm, thread_data->td, aom_memalign(32, sizeof(*thread_data->td))); av1_zero(*thread_data->td); // Set up pc_tree. thread_data->td->leaf_tree = NULL; thread_data->td->pc_tree = NULL; av1_setup_pc_tree(cm, thread_data->td); // Set up variance tree if needed. if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) av1_setup_var_tree(cm, thread_data->td); // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, aom_calloc(1, sizeof(*thread_data->td->counts))); // Create threads if (!winterface->reset(worker)) aom_internal_error(&cm->error, AOM_CODEC_ERROR, "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->td = &cpi->td; } winterface->sync(worker); } } for (i = 0; i < num_workers; i++) { AVxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data; worker->hook = (AVxWorkerHook)enc_worker_hook; worker->data1 = &cpi->tile_thr_data[i]; worker->data2 = NULL; thread_data = (EncWorkerData *)worker->data1; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } #if CONFIG_PALETTE // Allocate buffers used by palette coding mode. if (cpi->common.allow_screen_content_tools && i < num_workers - 1) { MACROBLOCK *x = &thread_data->td->mb; CHECK_MEM_ERROR(cm, x->palette_buffer, aom_memalign(16, sizeof(*x->palette_buffer))); } #endif // CONFIG_PALETTE } // Encode a frame for (i = 0; i < num_workers; i++) { AVxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; // Set the starting tile for each thread. thread_data->start = i; if (i == cpi->num_workers - 1) winterface->execute(worker); else winterface->launch(worker); } // Encoding ends. for (i = 0; i < num_workers; i++) { AVxWorker *const worker = &cpi->workers[i]; winterface->sync(worker); } for (i = 0; i < num_workers; i++) { AVxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { av1_accumulate_frame_counts(&cm->counts, thread_data->td->counts); accumulate_rd_opt(&cpi->td, thread_data->td); } } }
AV1Decoder *av1_decoder_create(BufferPool *const pool) { AV1Decoder *volatile const pbi = aom_memalign(32, sizeof(*pbi)); AV1_COMMON *volatile const cm = pbi ? &pbi->common : NULL; if (!cm) return NULL; av1_zero(*pbi); if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; av1_decoder_remove(pbi); return NULL; } cm->error.setjmp = 1; CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)aom_memalign(32, sizeof(*cm->fc))); CHECK_MEM_ERROR(cm, cm->frame_contexts, (FRAME_CONTEXT *)aom_memalign( 32, FRAME_CONTEXTS * sizeof(*cm->frame_contexts))); memset(cm->fc, 0, sizeof(*cm->fc)); memset(cm->frame_contexts, 0, FRAME_CONTEXTS * sizeof(*cm->frame_contexts)); pbi->need_resync = 1; once(initialize_dec); // Initialize the references to not point to any frame buffers. memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map)); cm->current_video_frame = 0; pbi->ready_for_new_data = 1; pbi->common.buffer_pool = pool; cm->bit_depth = AOM_BITS_8; cm->dequant_bit_depth = AOM_BITS_8; cm->alloc_mi = av1_dec_alloc_mi; cm->free_mi = av1_dec_free_mi; cm->setup_mi = av1_dec_setup_mi; av1_loop_filter_init(cm); #if CONFIG_NCOBMC_ADAPT_WEIGHT get_default_ncobmc_kernels(cm); #endif // CONFIG_NCOBMC_ADAPT_WEIGHT #if CONFIG_AOM_QM aom_qm_init(cm); #endif #if CONFIG_LOOP_RESTORATION av1_loop_restoration_precal(); #endif // CONFIG_LOOP_RESTORATION #if CONFIG_ACCOUNTING pbi->acct_enabled = 1; aom_accounting_init(&pbi->accounting); #endif cm->error.setjmp = 0; aom_get_worker_interface()->init(&pbi->lf_worker); return pbi; }
int aom_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int ss_x, int ss_y, #if CONFIG_HIGHBITDEPTH int use_highbitdepth, #endif int border, int byte_alignment, aom_codec_frame_buffer_t *fb, aom_get_frame_buffer_cb_fn_t cb, void *cb_priv) { if (ybf) { const int aom_byte_align = (byte_alignment == 0) ? 1 : byte_alignment; const int aligned_width = (width + 7) & ~7; const int aligned_height = (height + 7) & ~7; const int y_stride = ((aligned_width + 2 * border) + 31) & ~31; const uint64_t yplane_size = (aligned_height + 2 * border) * (uint64_t)y_stride + byte_alignment; const int uv_width = aligned_width >> ss_x; const int uv_height = aligned_height >> ss_y; const int uv_stride = y_stride >> ss_x; const int uv_border_w = border >> ss_x; const int uv_border_h = border >> ss_y; const uint64_t uvplane_size = (uv_height + 2 * uv_border_h) * (uint64_t)uv_stride + byte_alignment; #if CONFIG_HIGHBITDEPTH const uint64_t frame_size = (1 + use_highbitdepth) * (yplane_size + 2 * uvplane_size); #else const uint64_t frame_size = yplane_size + 2 * uvplane_size; #endif // CONFIG_HIGHBITDEPTH uint8_t *buf = NULL; if (cb != NULL) { const int align_addr_extra_size = 31; const uint64_t external_frame_size = frame_size + align_addr_extra_size; assert(fb != NULL); if (external_frame_size != (size_t)external_frame_size) return -1; // Allocation to hold larger frame, or first allocation. if (cb(cb_priv, (size_t)external_frame_size, fb) < 0) return -1; if (fb->data == NULL || fb->size < external_frame_size) return -1; ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32); #if defined(__has_feature) #if __has_feature(memory_sanitizer) // This memset is needed for fixing the issue of using uninitialized // value in msan test. It will cause a perf loss, so only do this for // msan test. memset(ybf->buffer_alloc, 0, (int)frame_size); #endif #endif } else if (frame_size > (size_t)ybf->buffer_alloc_sz) { // Allocation to hold larger frame, or first allocation. aom_free(ybf->buffer_alloc); ybf->buffer_alloc = NULL; if (frame_size != (size_t)frame_size) return -1; ybf->buffer_alloc = (uint8_t *)aom_memalign(32, (size_t)frame_size); if (!ybf->buffer_alloc) return -1; ybf->buffer_alloc_sz = (size_t)frame_size; // This memset is needed for fixing valgrind error from C loop filter // due to access uninitialized memory in frame border. It could be // removed if border is totally removed. memset(ybf->buffer_alloc, 0, ybf->buffer_alloc_sz); } /* Only support allocating buffers that have a border that's a multiple * of 32. The border restriction is required to get 16-byte alignment of * the start of the chroma rows without introducing an arbitrary gap * between planes, which would break the semantics of things like * aom_img_set_rect(). */ if (border & 0x1f) return -3; ybf->y_crop_width = width; ybf->y_crop_height = height; ybf->y_width = aligned_width; ybf->y_height = aligned_height; ybf->y_stride = y_stride; ybf->uv_crop_width = (width + ss_x) >> ss_x; ybf->uv_crop_height = (height + ss_y) >> ss_y; ybf->uv_width = uv_width; ybf->uv_height = uv_height; ybf->uv_stride = uv_stride; ybf->border = border; ybf->frame_size = (size_t)frame_size; ybf->subsampling_x = ss_x; ybf->subsampling_y = ss_y; buf = ybf->buffer_alloc; #if CONFIG_HIGHBITDEPTH if (use_highbitdepth) { // Store uint16 addresses when using 16bit framebuffers buf = CONVERT_TO_BYTEPTR(ybf->buffer_alloc); ybf->flags = YV12_FLAG_HIGHBITDEPTH; } else { ybf->flags = 0; } #endif // CONFIG_HIGHBITDEPTH ybf->y_buffer = (uint8_t *)yv12_align_addr( buf + (border * y_stride) + border, aom_byte_align); ybf->u_buffer = (uint8_t *)yv12_align_addr( buf + yplane_size + (uv_border_h * uv_stride) + uv_border_w, aom_byte_align); ybf->v_buffer = (uint8_t *)yv12_align_addr(buf + yplane_size + uvplane_size + (uv_border_h * uv_stride) + uv_border_w, aom_byte_align); #if CONFIG_HIGHBITDEPTH && CONFIG_GLOBAL_MOTION if (ybf->y_buffer_8bit) { free(ybf->y_buffer_8bit); ybf->y_buffer_8bit = NULL; } #endif ybf->corrupted = 0; /* assume not corrupted by errors */ return 0; } return -2; }