Exemplo n.º 1
0
// Allocate memory for lf row synchronization
void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
                           int width) {
  lf_sync->rows = rows;
#if CONFIG_MULTITHREAD
  {
    int i;

    CHECK_MEM_ERROR(cm, lf_sync->mutex_,
                    vpx_malloc(sizeof(*lf_sync->mutex_) * rows));
    for (i = 0; i < rows; ++i) {
      pthread_mutex_init(&lf_sync->mutex_[i], NULL);
    }

    CHECK_MEM_ERROR(cm, lf_sync->cond_,
                    vpx_malloc(sizeof(*lf_sync->cond_) * rows));
    for (i = 0; i < rows; ++i) {
      pthread_cond_init(&lf_sync->cond_[i], NULL);
    }
  }
#endif  // CONFIG_MULTITHREAD

  CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
                  vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));

  // Set up nsync.
  lf_sync->sync_range = get_sync_range(width);
}
Exemplo n.º 2
0
CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
  size_t last_coded_q_map_size;
  size_t consec_zero_mv_size;
  CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
  if (cr == NULL)
    return NULL;

  cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map));
  if (cr->map == NULL) {
    vpx_free(cr);
    return NULL;
  }
  last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map);
  cr->last_coded_q_map = vpx_malloc(last_coded_q_map_size);
  if (cr->last_coded_q_map == NULL) {
    vpx_free(cr);
    return NULL;
  }
  assert(MAXQ <= 255);
  memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);

  consec_zero_mv_size = mi_rows * mi_cols * sizeof(*cr->consec_zero_mv);
  cr->consec_zero_mv = vpx_malloc(consec_zero_mv_size);
  if (cr->consec_zero_mv == NULL) {
    vpx_free(cr);
    return NULL;
  }
  memset(cr->consec_zero_mv, 0, consec_zero_mv_size);
  return cr;
}
Exemplo n.º 3
0
// Allocate memory for row synchronization
void vp9_row_mt_sync_mem_alloc(VP9RowMTSync *row_mt_sync, VP9_COMMON *cm,
                               int rows) {
  row_mt_sync->rows = rows;
#if CONFIG_MULTITHREAD
  {
    int i;

    CHECK_MEM_ERROR(cm, row_mt_sync->mutex_,
                    vpx_malloc(sizeof(*row_mt_sync->mutex_) * rows));
    if (row_mt_sync->mutex_) {
      for (i = 0; i < rows; ++i) {
        pthread_mutex_init(&row_mt_sync->mutex_[i], NULL);
      }
    }

    CHECK_MEM_ERROR(cm, row_mt_sync->cond_,
                    vpx_malloc(sizeof(*row_mt_sync->cond_) * rows));
    if (row_mt_sync->cond_) {
      for (i = 0; i < rows; ++i) {
        pthread_cond_init(&row_mt_sync->cond_[i], NULL);
      }
    }
  }
#endif  // CONFIG_MULTITHREAD

  CHECK_MEM_ERROR(cm, row_mt_sync->cur_col,
                  vpx_malloc(sizeof(*row_mt_sync->cur_col) * rows));

  // Set up nsync.
  row_mt_sync->sync_range = 1;
}
Exemplo n.º 4
0
void vp9_create_encoding_threads(VP9_COMP *cpi) {
  VP9_COMMON * const cm = &cpi->common;
  const VP9WorkerInterface * const winterface = vp9_get_worker_interface();
  int i;

  CHECK_MEM_ERROR(cm, cpi->enc_thread_hndl,
                  vpx_malloc(sizeof(*cpi->enc_thread_hndl) * cpi->max_threads));
  for (i = 0; i < cpi->max_threads; ++i) {
    VP9Worker * const worker = &cpi->enc_thread_hndl[i];
    winterface->init(worker);
    CHECK_MEM_ERROR(cm, worker->data1,
                    vpx_memalign(32, sizeof(thread_context)));
    worker->data2 = NULL;
    if (i < cpi->max_threads - 1 && !winterface->reset(worker)) {
      vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                         "Tile decoder thread creation failed");
    }
  }
  // set row encoding hook
  for (i = 0; i < cpi->max_threads; ++i) {
    winterface->sync(&cpi->enc_thread_hndl[i]);
    cpi->enc_thread_hndl[i].hook = (VP9WorkerHook) encoding_thread_process;
  }
  CHECK_MEM_ERROR(cm, cpi->cur_sb_col,
                  vpx_malloc(sizeof(*cpi->cur_sb_col) * cm->sb_rows));
  // init cur sb col
  vpx_memset(cpi->cur_sb_col, -1, (sizeof(*cpi->cur_sb_col) * cm->sb_rows));
  // set up nsync (currently unused).
  cpi->sync_range = get_sync_range(cpi->oxcf.width);
}
void vp8cx_create_encoder_threads(VP8_COMP *cpi)
{
    cpi->b_multi_threaded = 0;

    cpi->processor_core_count = 32; //vp8_get_proc_core_count();

    CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows));

#if CONFIG_MULTITHREAD

    if (cpi->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
    {
        int ithread;

        if (cpi->oxcf.multi_threaded > cpi->processor_core_count)
            cpi->encoding_thread_count = cpi->processor_core_count - 1;
        else
            cpi->encoding_thread_count = cpi->oxcf.multi_threaded - 1;


        CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * cpi->encoding_thread_count));
        CHECK_MEM_ERROR(cpi->h_event_mbrencoding, vpx_malloc(sizeof(sem_t) * cpi->encoding_thread_count));
        CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * cpi->encoding_thread_count));
        vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * cpi->encoding_thread_count);
        CHECK_MEM_ERROR(cpi->en_thread_data, vpx_malloc(sizeof(ENCODETHREAD_DATA) * cpi->encoding_thread_count));
        //cpi->h_event_main = CreateEvent(NULL, FALSE, FALSE, NULL);
        sem_init(&cpi->h_event_main, 0, 0);

        cpi->b_multi_threaded = 1;

        //printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n", (cpi->encoding_thread_count +1));

        for (ithread = 0; ithread < cpi->encoding_thread_count; ithread++)
        {
            //cpi->h_event_mbrencoding[ithread] = CreateEvent(NULL, FALSE, FALSE, NULL);
            sem_init(&cpi->h_event_mbrencoding[ithread], 0, 0);
            cpi->en_thread_data[ithread].ithread = ithread;
            cpi->en_thread_data[ithread].ptr1 = (void *)cpi;
            cpi->en_thread_data[ithread].ptr2 = (void *)&cpi->mb_row_ei[ithread];

            //printf(" call begin thread %d \n", ithread);

            //cpi->h_encoding_thread[ithread] =   (HANDLE)_beginthreadex(
            //  NULL,           // security
            //  0,              // stksize
            //  thread_encoding_proc,
            //  (&cpi->en_thread_data[ithread]),          // Thread data
            //  0,
            //  NULL);

            pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, (&cpi->en_thread_data[ithread]));

        }

    }

#endif
}
Exemplo n.º 6
0
void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data,
                              VP9_COMMON *cm, int num_sbs, int max_threads,
                              int num_jobs) {
  int plane;
  const size_t dqcoeff_size = (num_sbs << DQCOEFFS_PER_SB_LOG2) *
                              sizeof(*row_mt_worker_data->dqcoeff[0]);
  row_mt_worker_data->num_jobs = num_jobs;
#if CONFIG_MULTITHREAD
  {
    int i;
    CHECK_MEM_ERROR(
        cm, row_mt_worker_data->recon_sync_mutex,
        vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_mutex) * num_jobs));
    if (row_mt_worker_data->recon_sync_mutex) {
      for (i = 0; i < num_jobs; ++i) {
        pthread_mutex_init(&row_mt_worker_data->recon_sync_mutex[i], NULL);
      }
    }

    CHECK_MEM_ERROR(
        cm, row_mt_worker_data->recon_sync_cond,
        vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_cond) * num_jobs));
    if (row_mt_worker_data->recon_sync_cond) {
      for (i = 0; i < num_jobs; ++i) {
        pthread_cond_init(&row_mt_worker_data->recon_sync_cond[i], NULL);
      }
    }
  }
#endif
  row_mt_worker_data->num_sbs = num_sbs;
  for (plane = 0; plane < 3; ++plane) {
    CHECK_MEM_ERROR(cm, row_mt_worker_data->dqcoeff[plane],
                    vpx_memalign(16, dqcoeff_size));
    memset(row_mt_worker_data->dqcoeff[plane], 0, dqcoeff_size);
    CHECK_MEM_ERROR(cm, row_mt_worker_data->eob[plane],
                    vpx_calloc(num_sbs << EOBS_PER_SB_LOG2,
                               sizeof(*row_mt_worker_data->eob[plane])));
  }
  CHECK_MEM_ERROR(cm, row_mt_worker_data->partition,
                  vpx_calloc(num_sbs * PARTITIONS_PER_SB,
                             sizeof(*row_mt_worker_data->partition)));
  CHECK_MEM_ERROR(cm, row_mt_worker_data->recon_map,
                  vpx_calloc(num_sbs, sizeof(*row_mt_worker_data->recon_map)));

  // allocate memory for thread_data
  if (row_mt_worker_data->thread_data == NULL) {
    const size_t thread_size =
        max_threads * sizeof(*row_mt_worker_data->thread_data);
    CHECK_MEM_ERROR(cm, row_mt_worker_data->thread_data,
                    vpx_memalign(32, thread_size));
  }
}
Exemplo n.º 7
0
static void create_enc_workers(VP9_COMP *cpi, int num_workers) {
  VP9_COMMON *const cm = &cpi->common;
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
  int i;

  // Only run once to create threads and allocate thread data.
  if (cpi->num_workers == 0) {
    int allocated_workers = num_workers;

    // While using SVC, we need to allocate threads according to the highest
    // resolution. When row based multithreading is enabled, it is OK to
    // allocate more threads than the number of max tile columns.
    if (cpi->use_svc && !cpi->row_mt) {
      int max_tile_cols = get_max_tile_cols(cpi);
      allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols);
    }

    CHECK_MEM_ERROR(cm, cpi->workers,
                    vpx_malloc(allocated_workers * sizeof(*cpi->workers)));

    CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
                    vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data)));

    for (i = 0; i < allocated_workers; i++) {
      VPxWorker *const worker = &cpi->workers[i];
      EncWorkerData *thread_data = &cpi->tile_thr_data[i];

      ++cpi->num_workers;
      winterface->init(worker);

      if (i < allocated_workers - 1) {
        thread_data->cpi = cpi;

        // Allocate thread data.
        CHECK_MEM_ERROR(cm, thread_data->td,
                        vpx_memalign(32, sizeof(*thread_data->td)));
        vp9_zero(*thread_data->td);

        // Set up pc_tree.
        thread_data->td->leaf_tree = NULL;
        thread_data->td->pc_tree = NULL;
        vp9_setup_pc_tree(cm, thread_data->td);

        // Allocate frame counters in thread data.
        CHECK_MEM_ERROR(cm, thread_data->td->counts,
                        vpx_calloc(1, sizeof(*thread_data->td->counts)));

        // Create threads
        if (!winterface->reset(worker))
          vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                             "Tile encoder thread creation failed");
      } else {
        // Main thread acts as a worker and uses the thread data in cpi.
        thread_data->cpi = cpi;
        thread_data->td = &cpi->td;
      }
      winterface->sync(worker);
    }
  }
}
Exemplo n.º 8
0
struct frame_dec_param *frame_dec_param_get(struct task *tsk) {
  struct frame_dec_param *param;

  param = vpx_malloc(sizeof(*param));
  if (!param) {
    return NULL;
  }

  tsk->priv = param;
  tsk->dtor = task_dtor;

  return param;
}
Exemplo n.º 9
0
static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) {
  int i;
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();

  ctx->last_show_frame = -1;
  ctx->next_submit_worker_id = 0;
  ctx->last_submit_worker_id = 0;
  ctx->next_output_worker_id = 0;
  ctx->frame_cache_read = 0;
  ctx->frame_cache_write = 0;
  ctx->num_cache_frames = 0;
  ctx->need_resync = 1;
  ctx->num_frame_workers =
      (ctx->frame_parallel_decode == 1) ? ctx->cfg.threads: 1;
  if (ctx->num_frame_workers > MAX_DECODE_THREADS)
    ctx->num_frame_workers = MAX_DECODE_THREADS;
  ctx->available_threads = ctx->num_frame_workers;
  ctx->flushed = 0;

  ctx->buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(BufferPool));
  if (ctx->buffer_pool == NULL)
    return VPX_CODEC_MEM_ERROR;

#if CONFIG_MULTITHREAD
    if (pthread_mutex_init(&ctx->buffer_pool->pool_mutex, NULL)) {
      set_error_detail(ctx, "Failed to allocate buffer pool mutex");
      return VPX_CODEC_MEM_ERROR;
    }
#endif

  ctx->frame_workers = (VPxWorker *)
      vpx_malloc(ctx->num_frame_workers * sizeof(*ctx->frame_workers));
  if (ctx->frame_workers == NULL) {
    set_error_detail(ctx, "Failed to allocate frame_workers");
    return VPX_CODEC_MEM_ERROR;
  }

  for (i = 0; i < ctx->num_frame_workers; ++i) {
    VPxWorker *const worker = &ctx->frame_workers[i];
    FrameWorkerData *frame_worker_data = NULL;
    winterface->init(worker);
    worker->data1 = vpx_memalign(32, sizeof(FrameWorkerData));
    if (worker->data1 == NULL) {
      set_error_detail(ctx, "Failed to allocate frame_worker_data");
      return VPX_CODEC_MEM_ERROR;
    }
    frame_worker_data = (FrameWorkerData *)worker->data1;
    frame_worker_data->pbi = vp9_decoder_create(ctx->buffer_pool);
    if (frame_worker_data->pbi == NULL) {
      set_error_detail(ctx, "Failed to allocate frame_worker_data");
      return VPX_CODEC_MEM_ERROR;
    }
    frame_worker_data->pbi->frame_worker_owner = worker;
    frame_worker_data->worker_id = i;
    frame_worker_data->scratch_buffer = NULL;
    frame_worker_data->scratch_buffer_size = 0;
    frame_worker_data->frame_context_ready = 0;
    frame_worker_data->received_frame = 0;
#if CONFIG_MULTITHREAD
    if (pthread_mutex_init(&frame_worker_data->stats_mutex, NULL)) {
      set_error_detail(ctx, "Failed to allocate frame_worker_data mutex");
      return VPX_CODEC_MEM_ERROR;
    }

    if (pthread_cond_init(&frame_worker_data->stats_cond, NULL)) {
      set_error_detail(ctx, "Failed to allocate frame_worker_data cond");
      return VPX_CODEC_MEM_ERROR;
    }
#endif
    // If decoding in serial mode, FrameWorker thread could create tile worker
    // thread or loopfilter thread.
    frame_worker_data->pbi->max_threads =
        (ctx->frame_parallel_decode == 0) ? ctx->cfg.threads : 0;

    frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order;
    frame_worker_data->pbi->frame_parallel_decode = ctx->frame_parallel_decode;
    frame_worker_data->pbi->common.frame_parallel_decode =
        ctx->frame_parallel_decode;
    worker->hook = (VPxWorkerHook)frame_worker_hook;
    if (!winterface->reset(worker)) {
      set_error_detail(ctx, "Frame Worker thread creation failed");
      return VPX_CODEC_MEM_ERROR;
    }
  }

  // If postprocessing was enabled by the application and a
  // configuration has not been provided, default it.
  if (!ctx->postproc_cfg_set &&
      (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC))
    set_default_ppflags(&ctx->postproc_cfg);

  init_buffer_callbacks(ctx);

  return VPX_CODEC_OK;
}
Exemplo n.º 10
0
int vp8cx_create_encoder_threads(VP8_COMP *cpi)
{
    const VP8_COMMON * cm = &cpi->common;

    cpi->b_multi_threaded = 0;
    cpi->encoding_thread_count = 0;
    cpi->b_lpf_running = 0;

    if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
    {
        int ithread;
        int th_count = cpi->oxcf.multi_threaded - 1;
        int rc = 0;

        /* don't allocate more threads than cores available */
        if (cpi->oxcf.multi_threaded > cm->processor_core_count)
            th_count = cm->processor_core_count - 1;

        /* we have th_count + 1 (main) threads processing one row each */
        /* no point to have more threads than the sync range allows */
        if(th_count > ((cm->mb_cols / cpi->mt_sync_range) - 1))
        {
            th_count = (cm->mb_cols / cpi->mt_sync_range) - 1;
        }

        if(th_count == 0)
            return 0;

        CHECK_MEM_ERROR(cpi->h_encoding_thread,
                        vpx_malloc(sizeof(pthread_t) * th_count));
        CHECK_MEM_ERROR(cpi->h_event_start_encoding,
                        vpx_malloc(sizeof(sem_t) * th_count));
        CHECK_MEM_ERROR(cpi->mb_row_ei,
                        vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count));
        vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count);
        CHECK_MEM_ERROR(cpi->en_thread_data,
                        vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count));

        sem_init(&cpi->h_event_end_encoding, 0, 0);

        cpi->b_multi_threaded = 1;
        cpi->encoding_thread_count = th_count;

        /*
        printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n",
               (cpi->encoding_thread_count +1));
        */

        for (ithread = 0; ithread < th_count; ithread++)
        {
            ENCODETHREAD_DATA *ethd = &cpi->en_thread_data[ithread];

            /* Setup block ptrs and offsets */
            vp8_setup_block_ptrs(&cpi->mb_row_ei[ithread].mb);
            vp8_setup_block_dptrs(&cpi->mb_row_ei[ithread].mb.e_mbd);

            sem_init(&cpi->h_event_start_encoding[ithread], 0, 0);

            ethd->ithread = ithread;
            ethd->ptr1 = (void *)cpi;
            ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread];

            rc = pthread_create(&cpi->h_encoding_thread[ithread], 0,
                                thread_encoding_proc, ethd);
            if(rc)
                break;
        }

        if(rc)
        {
            /* shutdown other threads */
            cpi->b_multi_threaded = 0;
            for(--ithread; ithread >= 0; ithread--)
            {
                pthread_join(cpi->h_encoding_thread[ithread], 0);
                sem_destroy(&cpi->h_event_start_encoding[ithread]);
            }
            sem_destroy(&cpi->h_event_end_encoding);

            /* free thread related resources */
            vpx_free(cpi->h_event_start_encoding);
            vpx_free(cpi->h_encoding_thread);
            vpx_free(cpi->mb_row_ei);
            vpx_free(cpi->en_thread_data);

            return -1;
        }


        {
            LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data;

            sem_init(&cpi->h_event_start_lpf, 0, 0);
            sem_init(&cpi->h_event_end_lpf, 0, 0);

            lpfthd->ptr1 = (void *)cpi;
            rc = pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter,
                                lpfthd);

            if(rc)
            {
                /* shutdown other threads */
                cpi->b_multi_threaded = 0;
                for(--ithread; ithread >= 0; ithread--)
                {
                    sem_post(&cpi->h_event_start_encoding[ithread]);
                    pthread_join(cpi->h_encoding_thread[ithread], 0);
                    sem_destroy(&cpi->h_event_start_encoding[ithread]);
                }
                sem_destroy(&cpi->h_event_end_encoding);
                sem_destroy(&cpi->h_event_end_lpf);
                sem_destroy(&cpi->h_event_start_lpf);

                /* free thread related resources */
                vpx_free(cpi->h_event_start_encoding);
                vpx_free(cpi->h_encoding_thread);
                vpx_free(cpi->mb_row_ei);
                vpx_free(cpi->en_thread_data);

                return -2;
            }
        }
    }
    return 0;
}
Exemplo n.º 11
0
void vp9_init_layer_context(VP9_COMP *const cpi) {
  SVC *const svc = &cpi->svc;
  const VP9EncoderConfig *const oxcf = &cpi->oxcf;
  int mi_rows = cpi->common.mi_rows;
  int mi_cols = cpi->common.mi_cols;
  int sl, tl, i;
  int alt_ref_idx = svc->number_spatial_layers;

  svc->spatial_layer_id = 0;
  svc->temporal_layer_id = 0;
  svc->first_spatial_layer_to_encode = 0;
  svc->rc_drop_superframe = 0;
  svc->force_zero_mode_spatial_ref = 0;
  svc->use_base_mv = 0;
  svc->scaled_temp_is_alloc = 0;
  svc->scaled_one_half = 0;
  svc->current_superframe = 0;
  for (i = 0; i < REF_FRAMES; ++i)
    svc->ref_frame_index[i] = -1;
  for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
    cpi->svc.ext_frame_flags[sl] = 0;
    cpi->svc.ext_lst_fb_idx[sl] = 0;
    cpi->svc.ext_gld_fb_idx[sl] = 1;
    cpi->svc.ext_alt_fb_idx[sl] = 2;
  }

  if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
    if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img,
                                 SMALL_FRAME_WIDTH, SMALL_FRAME_HEIGHT,
                                 cpi->common.subsampling_x,
                                 cpi->common.subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
                                 cpi->common.use_highbitdepth,
#endif
                                 VP9_ENC_BORDER_IN_PIXELS,
                                 cpi->common.byte_alignment,
                                 NULL, NULL, NULL))
      vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
                         "Failed to allocate empty frame for multiple frame "
                         "contexts");

    memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80,
           cpi->svc.empty_frame.img.buffer_alloc_sz);
  }

  for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
    for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
      int layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers);
      LAYER_CONTEXT *const lc = &svc->layer_context[layer];
      RATE_CONTROL *const lrc = &lc->rc;
      int i;
      lc->current_video_frame_in_layer = 0;
      lc->layer_size = 0;
      lc->frames_from_key_frame = 0;
      lc->last_frame_type = FRAME_TYPES;
      lrc->ni_av_qi = oxcf->worst_allowed_q;
      lrc->total_actual_bits = 0;
      lrc->total_target_vs_actual = 0;
      lrc->ni_tot_qi = 0;
      lrc->tot_q = 0.0;
      lrc->avg_q = 0.0;
      lrc->ni_frames = 0;
      lrc->decimation_count = 0;
      lrc->decimation_factor = 0;

      for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
        lrc->rate_correction_factors[i] = 1.0;
      }

      if (cpi->oxcf.rc_mode == VPX_CBR) {
        lc->target_bandwidth = oxcf->layer_target_bitrate[layer];
        lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;
        lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
        lrc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q;
      } else {
        lc->target_bandwidth = oxcf->layer_target_bitrate[layer];
        lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q;
        lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q;
        lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q +
                                            oxcf->best_allowed_q) / 2;
        lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q +
                                              oxcf->best_allowed_q) / 2;
        if (oxcf->ss_enable_auto_arf[sl])
          lc->alt_ref_idx = alt_ref_idx++;
        else
          lc->alt_ref_idx = INVALID_IDX;
        lc->gold_ref_idx = INVALID_IDX;
      }

      lrc->buffer_level = oxcf->starting_buffer_level_ms *
                              lc->target_bandwidth / 1000;
      lrc->bits_off_target = lrc->buffer_level;

      // Initialize the cyclic refresh parameters. If spatial layers are used
      // (i.e., ss_number_layers > 1), these need to be updated per spatial
      // layer.
      // Cyclic refresh is only applied on base temporal layer.
      if (oxcf->ss_number_layers > 1 &&
          tl == 0) {
        size_t last_coded_q_map_size;
        size_t consec_zero_mv_size;
        VP9_COMMON *const cm = &cpi->common;
        lc->sb_index = 0;
        CHECK_MEM_ERROR(cm, lc->map,
                        vpx_malloc(mi_rows * mi_cols * sizeof(*lc->map)));
        memset(lc->map, 0, mi_rows * mi_cols);
        last_coded_q_map_size = mi_rows * mi_cols *
                                sizeof(*lc->last_coded_q_map);
        CHECK_MEM_ERROR(cm, lc->last_coded_q_map,
                        vpx_malloc(last_coded_q_map_size));
        assert(MAXQ <= 255);
        memset(lc->last_coded_q_map, MAXQ, last_coded_q_map_size);
        consec_zero_mv_size = mi_rows * mi_cols * sizeof(*lc->consec_zero_mv);
        CHECK_MEM_ERROR(cm, lc->consec_zero_mv,
                        vpx_malloc(consec_zero_mv_size));
        memset(lc->consec_zero_mv, 0, consec_zero_mv_size);
       }
    }
  }

  // Still have extra buffer for base layer golden frame
  if (!(svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR)
      && alt_ref_idx < REF_FRAMES)
    svc->layer_context[0].gold_ref_idx = alt_ref_idx;
}
Exemplo n.º 12
0
static void setup_token_decoder(VP8D_COMP *pbi,
                                const unsigned char* token_part_sizes)
{
    vp8_reader *bool_decoder = &pbi->bc2;
    unsigned int partition_idx;
    int fragment_idx;
    int num_token_partitions;
    const unsigned char *first_fragment_end = pbi->fragments[0] +
                                          pbi->fragment_sizes[0];

    TOKEN_PARTITION multi_token_partition =
            (TOKEN_PARTITION)vp8_read_literal(&pbi->bc, 2);
    if (!vp8dx_bool_error(&pbi->bc))
        pbi->common.multi_token_partition = multi_token_partition;
    num_token_partitions = 1 << pbi->common.multi_token_partition;
    if (num_token_partitions > 1)
    {
        CHECK_MEM_ERROR(pbi->mbc, vpx_malloc(num_token_partitions *
                                             sizeof(vp8_reader)));
        bool_decoder = pbi->mbc;
    }

    /* Check for partitions within the fragments and unpack the fragments
     * so that each fragment pointer points to its corresponding partition. */
    for (fragment_idx = 0; fragment_idx < pbi->num_fragments; ++fragment_idx)
    {
        unsigned int fragment_size = pbi->fragment_sizes[fragment_idx];
        const unsigned char *fragment_end = pbi->fragments[fragment_idx] +
                                            fragment_size;
        /* Special case for handling the first partition since we have already
         * read its size. */
        if (fragment_idx == 0)
        {
            /* Size of first partition + token partition sizes element */
            ptrdiff_t ext_first_part_size = token_part_sizes -
                pbi->fragments[0] + 3 * (num_token_partitions - 1);
            fragment_size -= ext_first_part_size;
            if (fragment_size > 0)
            {
                pbi->fragment_sizes[0] = ext_first_part_size;
                /* The fragment contains an additional partition. Move to
                 * next. */
                fragment_idx++;
                pbi->fragments[fragment_idx] = pbi->fragments[0] +
                  pbi->fragment_sizes[0];
            }
        }
        /* Split the chunk into partitions read from the bitstream */
        while (fragment_size > 0)
        {
            ptrdiff_t partition_size = read_available_partition_size(
                                                 pbi,
                                                 token_part_sizes,
                                                 pbi->fragments[fragment_idx],
                                                 first_fragment_end,
                                                 fragment_end,
                                                 fragment_idx - 1,
                                                 num_token_partitions);
            pbi->fragment_sizes[fragment_idx] = partition_size;
            fragment_size -= partition_size;
            assert(fragment_idx <= num_token_partitions);
            if (fragment_size > 0)
            {
                /* The fragment contains an additional partition.
                 * Move to next. */
                fragment_idx++;
                pbi->fragments[fragment_idx] =
                    pbi->fragments[fragment_idx - 1] + partition_size;
            }
        }
    }

    pbi->num_fragments = num_token_partitions + 1;

    for (partition_idx = 1; partition_idx < pbi->num_fragments; ++partition_idx)
    {
        if (vp8dx_start_decode(bool_decoder,
                               pbi->fragments[partition_idx],
                               pbi->fragment_sizes[partition_idx]))
            vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,
                               "Failed to allocate bool decoder %d",
                               partition_idx);

        bool_decoder++;
    }

#if CONFIG_MULTITHREAD
    /* Clamp number of decoder threads */
    if (pbi->decoding_thread_count > num_token_partitions - 1)
        pbi->decoding_thread_count = num_token_partitions - 1;
#endif
}
Exemplo n.º 13
0
void vp10_encode_tiles_mt(VP10_COMP *cpi) {
  VP10_COMMON *const cm = &cpi->common;
  const int tile_cols = 1 << cm->log2_tile_cols;
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
  const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols);
  int i;

  vp10_init_tile_data(cpi);

  // Only run once to create threads and allocate thread data.
  if (cpi->num_workers == 0) {
    int allocated_workers = num_workers;

    CHECK_MEM_ERROR(cm, cpi->workers,
                    vpx_malloc(allocated_workers * sizeof(*cpi->workers)));

    CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
                    vpx_calloc(allocated_workers,
                    sizeof(*cpi->tile_thr_data)));

    for (i = 0; i < allocated_workers; i++) {
      VPxWorker *const worker = &cpi->workers[i];
      EncWorkerData *thread_data = &cpi->tile_thr_data[i];

      ++cpi->num_workers;
      winterface->init(worker);

      if (i < allocated_workers - 1) {
        thread_data->cpi = cpi;

        // Allocate thread data.
        CHECK_MEM_ERROR(cm, thread_data->td,
                        vpx_memalign(32, sizeof(*thread_data->td)));
        vp10_zero(*thread_data->td);

        // Set up pc_tree.
        thread_data->td->leaf_tree = NULL;
        thread_data->td->pc_tree = NULL;
        vp10_setup_pc_tree(cm, thread_data->td);

        // Allocate frame counters in thread data.
        CHECK_MEM_ERROR(cm, thread_data->td->counts,
                        vpx_calloc(1, sizeof(*thread_data->td->counts)));

        // Create threads
        if (!winterface->reset(worker))
          vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                             "Tile encoder thread creation failed");
      } else {
        // Main thread acts as a worker and uses the thread data in cpi.
        thread_data->cpi = cpi;
        thread_data->td = &cpi->td;
      }

      winterface->sync(worker);
    }
  }

  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *thread_data;

    worker->hook = (VPxWorkerHook)enc_worker_hook;
    worker->data1 = &cpi->tile_thr_data[i];
    worker->data2 = NULL;
    thread_data = (EncWorkerData*)worker->data1;

    // Before encoding a frame, copy the thread data from cpi.
    if (thread_data->td != &cpi->td) {
      thread_data->td->mb = cpi->td.mb;
      thread_data->td->rd_counts = cpi->td.rd_counts;
    }
    if (thread_data->td->counts != &cpi->common.counts) {
      memcpy(thread_data->td->counts, &cpi->common.counts,
             sizeof(cpi->common.counts));
    }
  }

  // Encode a frame
  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;

    // Set the starting tile for each thread.
    thread_data->start = i;

    if (i == cpi->num_workers - 1)
      winterface->execute(worker);
    else
      winterface->launch(worker);
  }

  // Encoding ends.
  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    winterface->sync(worker);
  }

  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;

    // Accumulate counters.
    if (i < cpi->num_workers - 1) {
      vp10_accumulate_frame_counts(cm, thread_data->td->counts, 0);
      accumulate_rd_opt(&cpi->td, thread_data->td);
    }
  }
}
Exemplo n.º 14
0
Arquivo: dering.c Projeto: jmvalin/aom
void vp10_dering_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
                       MACROBLOCKD *xd, int global_level) {
    int r, c;
    int sbr, sbc;
    int nhsb, nvsb;
    od_dering_in *src[3];
    unsigned char *bskip;
    int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = {{0}};
    int stride;
    int bsize[3];
    int dec[3];
    int pli;
    int coeff_shift = VPXMAX(cm->bit_depth - 8, 0);
    nvsb = (cm->mi_rows + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE;
    nhsb = (cm->mi_cols + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE;
    bskip = vpx_malloc(sizeof(*bskip)*cm->mi_rows*cm->mi_cols);
    vp10_setup_dst_planes(xd->plane, frame, 0, 0);
    for (pli = 0; pli < 3; pli++) {
        dec[pli] = xd->plane[pli].subsampling_x;
        bsize[pli] = 8 >> dec[pli];
    }
    stride = bsize[0]*cm->mi_cols;
    for (pli = 0; pli < 3; pli++) {
        src[pli] = vpx_malloc(sizeof(*src)*cm->mi_rows*cm->mi_cols*64);
        for (r = 0; r < bsize[pli]*cm->mi_rows; ++r) {
            for (c = 0; c < bsize[pli]*cm->mi_cols; ++c) {
#if CONFIG_VPX_HIGHBITDEPTH
                if (cm->use_highbitdepth) {
                    src[pli][r * stride + c] =
                        CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
                        [r * xd->plane[pli].dst.stride + c];
                } else {
#endif
                    src[pli][r * stride + c] =
                        xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c];
#if CONFIG_VPX_HIGHBITDEPTH
                }
#endif
            }
        }
    }
    for (r = 0; r < cm->mi_rows; ++r) {
        for (c = 0; c < cm->mi_cols; ++c) {
            const MB_MODE_INFO *mbmi =
                &cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi;
            bskip[r * cm->mi_cols + c] = mbmi->skip;
        }
    }
    for (sbr = 0; sbr < nvsb; sbr++) {
        for (sbc = 0; sbc < nhsb; sbc++) {
            int level;
            int nhb, nvb;
            nhb = VPXMIN(MI_BLOCK_SIZE, cm->mi_cols - MI_BLOCK_SIZE*sbc);
            nvb = VPXMIN(MI_BLOCK_SIZE, cm->mi_rows - MI_BLOCK_SIZE*sbr);
            for (pli = 0; pli < 3; pli++) {
                int16_t dst[MI_BLOCK_SIZE*MI_BLOCK_SIZE*8*8];
                int threshold;
#if DERING_REFINEMENT
                level = compute_level_from_index(
                            global_level,
                            cm->mi_grid_visible[MI_BLOCK_SIZE*sbr*cm->mi_stride +
                                                MI_BLOCK_SIZE*sbc]->mbmi.dering_gain);
#else
                level = global_level;
#endif
                /* FIXME: This is a temporary hack that uses more conservative
                   deringing for chroma. */
                if (pli) level = (level*5 + 4) >> 3;
                if (sb_all_skip(cm, sbr*MI_BLOCK_SIZE, sbc*MI_BLOCK_SIZE)) level = 0;
                threshold = level << coeff_shift;
                od_dering(
                    &OD_DERING_VTBL_C,
                    dst,
                    MI_BLOCK_SIZE*bsize[pli],
                    &src[pli][sbr*stride*bsize[pli]*MI_BLOCK_SIZE +
                              sbc*bsize[pli]*MI_BLOCK_SIZE],
                    stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli,
                    &bskip[MI_BLOCK_SIZE*sbr*cm->mi_cols + MI_BLOCK_SIZE*sbc],
                    cm->mi_cols, threshold, OD_DERING_NO_CHECK_OVERLAP, coeff_shift);
                for (r = 0; r < bsize[pli]*nvb; ++r) {
                    for (c = 0; c < bsize[pli]*nhb; ++c) {
#if CONFIG_VPX_HIGHBITDEPTH
                        if (cm->use_highbitdepth) {
                            CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
                            [xd->plane[pli].dst.stride*(bsize[pli]*MI_BLOCK_SIZE*sbr + r)
                             + sbc*bsize[pli]*MI_BLOCK_SIZE + c] =
                                 dst[r * MI_BLOCK_SIZE * bsize[pli] + c];
                        } else {
#endif
                            xd->plane[pli].dst.buf[xd->plane[pli].dst.stride*
                                                   (bsize[pli]*MI_BLOCK_SIZE*sbr + r) +
                                                   sbc*bsize[pli]*MI_BLOCK_SIZE + c] =
                                                       dst[r * MI_BLOCK_SIZE * bsize[pli] + c];
#if CONFIG_VPX_HIGHBITDEPTH
                        }
#endif
                    }
                }
            }
        }
    }
    for (pli = 0; pli < 3; pli++) {
        vpx_free(src[pli]);
    }
    vpx_free(bskip);
}
Exemplo n.º 15
0
void vp8cx_create_encoder_threads(VP8_COMP *cpi)
{
    const VP8_COMMON * cm = &cpi->common;

    cpi->b_multi_threaded = 0;
    cpi->encoding_thread_count = 0;

    if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
    {
        int ithread;
        int th_count = cpi->oxcf.multi_threaded - 1;

        /* don't allocate more threads than cores available */
        if (cpi->oxcf.multi_threaded > cm->processor_core_count)
            th_count = cm->processor_core_count - 1;

        /* we have th_count + 1 (main) threads processing one row each */
        /* no point to have more threads than the sync range allows */
        if(th_count > ((cm->mb_cols / cpi->mt_sync_range) - 1))
        {
            th_count = (cm->mb_cols / cpi->mt_sync_range) - 1;
        }

        if(th_count == 0)
            return;

        CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * th_count));
        CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * th_count));
        CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count));
        vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count);
        CHECK_MEM_ERROR(cpi->en_thread_data,
                        vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count));
        CHECK_MEM_ERROR(cpi->mt_current_mb_col,
                        vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));

        sem_init(&cpi->h_event_end_encoding, 0, 0);

        cpi->b_multi_threaded = 1;
        cpi->encoding_thread_count = th_count;

        /*
        printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n",
               (cpi->encoding_thread_count +1));
        */

        for (ithread = 0; ithread < th_count; ithread++)
        {
            ENCODETHREAD_DATA * ethd = &cpi->en_thread_data[ithread];

            sem_init(&cpi->h_event_start_encoding[ithread], 0, 0);
            ethd->ithread = ithread;
            ethd->ptr1 = (void *)cpi;
            ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread];

            pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd);
        }

        {
            LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data;

            sem_init(&cpi->h_event_start_lpf, 0, 0);
            sem_init(&cpi->h_event_end_lpf, 0, 0);

            lpfthd->ptr1 = (void *)cpi;
            pthread_create(&cpi->h_filter_thread, 0, loopfilter_thread, lpfthd);
        }
    }

}
Exemplo n.º 16
0
void vp9_encode_tiles_mt(VP9_COMP *cpi) {
  VP9_COMMON *const cm = &cpi->common;
  const int tile_cols = 1 << cm->log2_tile_cols;
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
  const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols);
  int i;

  vp9_init_tile_data(cpi);

  // Only run once to create threads and allocate thread data.
  if (cpi->num_workers == 0) {
    int allocated_workers = num_workers;

    // While using SVC, we need to allocate threads according to the highest
    // resolution.
    if (cpi->use_svc) {
      int max_tile_cols = get_max_tile_cols(cpi);
      allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols);
    }

    CHECK_MEM_ERROR(cm, cpi->workers,
                    vpx_malloc(allocated_workers * sizeof(*cpi->workers)));

    CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
                    vpx_calloc(allocated_workers,
                    sizeof(*cpi->tile_thr_data)));

    for (i = 0; i < allocated_workers; i++) {
      VPxWorker *const worker = &cpi->workers[i];
      EncWorkerData *thread_data = &cpi->tile_thr_data[i];

      ++cpi->num_workers;
      winterface->init(worker);

      if (i < allocated_workers - 1) {
        thread_data->cpi = cpi;

        // Allocate thread data.
        CHECK_MEM_ERROR(cm, thread_data->td,
                        vpx_memalign(32, sizeof(*thread_data->td)));
        vp9_zero(*thread_data->td);

        // Set up pc_tree.
        thread_data->td->leaf_tree = NULL;
        thread_data->td->pc_tree = NULL;
        vp9_setup_pc_tree(cm, thread_data->td);

        // Allocate frame counters in thread data.
        CHECK_MEM_ERROR(cm, thread_data->td->counts,
                        vpx_calloc(1, sizeof(*thread_data->td->counts)));

        // Create threads
        if (!winterface->reset(worker))
          vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                             "Tile encoder thread creation failed");
      } else {
        // Main thread acts as a worker and uses the thread data in cpi.
        thread_data->cpi = cpi;
        thread_data->td = &cpi->td;
      }

      winterface->sync(worker);
    }
  }

  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *thread_data;

    worker->hook = (VPxWorkerHook)enc_worker_hook;
    worker->data1 = &cpi->tile_thr_data[i];
    worker->data2 = NULL;
    thread_data = (EncWorkerData*)worker->data1;

    // Before encoding a frame, copy the thread data from cpi.
    if (thread_data->td != &cpi->td) {
      thread_data->td->mb = cpi->td.mb;
      thread_data->td->rd_counts = cpi->td.rd_counts;
    }
    if (thread_data->td->counts != &cpi->common.counts) {
      memcpy(thread_data->td->counts, &cpi->common.counts,
             sizeof(cpi->common.counts));
    }

    // Handle use_nonrd_pick_mode case.
    if (cpi->sf.use_nonrd_pick_mode) {
      MACROBLOCK *const x = &thread_data->td->mb;
      MACROBLOCKD *const xd = &x->e_mbd;
      struct macroblock_plane *const p = x->plane;
      struct macroblockd_plane *const pd = xd->plane;
      PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none;
      int j;

      for (j = 0; j < MAX_MB_PLANE; ++j) {
        p[j].coeff = ctx->coeff_pbuf[j][0];
        p[j].qcoeff = ctx->qcoeff_pbuf[j][0];
        pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0];
        p[j].eobs = ctx->eobs_pbuf[j][0];
      }
    }
  }

  // Encode a frame
  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;

    // Set the starting tile for each thread.
    thread_data->start = i;

    if (i == cpi->num_workers - 1)
      winterface->execute(worker);
    else
      winterface->launch(worker);
  }

  // Encoding ends.
  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    winterface->sync(worker);
  }

  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;

    // Accumulate counters.
    if (i < cpi->num_workers - 1) {
      vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0);
      accumulate_rd_opt(&cpi->td, thread_data->td);
    }
  }
}