/** * schro_params_calculate_iwt_sizes: * @params: pointer to @SchroParams structure * * Calculates the size of the array used for wavelet transformation * using the current video format and transformation depth in the * @params structure. The @params structure is updated with the new * values. * * The structure fields changed are: iwt_chroma_width, iwt_chroma_height, * iwt_luma_width, iwt_luma_height. */ void schro_params_calculate_iwt_sizes (SchroParams * params) { SchroVideoFormat *video_format = params->video_format; int picture_luma_width, picture_luma_height; int picture_chroma_width, picture_chroma_height; schro_video_format_get_picture_luma_size (video_format, &picture_luma_width, &picture_luma_height); params->iwt_luma_width = ROUND_UP_POW2 (picture_luma_width, params->transform_depth); params->iwt_luma_height = ROUND_UP_POW2 (picture_luma_height, params->transform_depth); schro_video_format_get_picture_chroma_size (video_format, &picture_chroma_width, &picture_chroma_height); params->iwt_chroma_width = ROUND_UP_POW2 (picture_chroma_width, params->transform_depth); params->iwt_chroma_height = ROUND_UP_POW2 (picture_chroma_height, params->transform_depth); SCHRO_DEBUG ("iwt chroma size %d x %d", params->iwt_chroma_width, params->iwt_chroma_height); SCHRO_DEBUG ("iwt luma size %d x %d", params->iwt_luma_width, params->iwt_luma_height); }
void schro_memory_domain_memfree (SchroMemoryDomain * domain, void *ptr) { int i; SCHRO_ASSERT (domain != NULL); SCHRO_DEBUG ("free %p", ptr); schro_mutex_lock (domain->mutex); for (i = 0; i < SCHRO_MEMORY_DOMAIN_SLOTS; i++) { if (!(domain->slots[i].flags & SCHRO_MEMORY_DOMAIN_SLOT_ALLOCATED)) { continue; } if (!(domain->slots[i].flags & SCHRO_MEMORY_DOMAIN_SLOT_IN_USE)) { continue; } if (domain->slots[i].ptr == ptr) { #ifdef MEM_DOMAIN_ALWAYS_FREE domain->free (domain->slots[i].ptr, domain->slots[i].size); domain->slots[i].flags = 0; #else domain->slots[i].flags &= (~SCHRO_MEMORY_DOMAIN_SLOT_IN_USE); #endif schro_mutex_unlock (domain->mutex); return; } } schro_mutex_unlock (domain->mutex); SCHRO_ASSERT (0); }
void schro_encoder_frame_downsample (SchroEncoderFrame *frame) { int i; SCHRO_DEBUG("downsampling frame %d", frame->frame_number); for(i=0;i<5;i++){ frame->downsampled_frames[i] = schro_frame_new_and_alloc (NULL, frame->filtered_frame->format, ROUND_UP_SHIFT(frame->filtered_frame->width, i+1), ROUND_UP_SHIFT(frame->filtered_frame->height, i+1)); } schro_frame_downsample (frame->downsampled_frames[0], frame->filtered_frame); schro_frame_downsample (frame->downsampled_frames[1], frame->downsampled_frames[0]); schro_frame_downsample (frame->downsampled_frames[2], frame->downsampled_frames[1]); schro_frame_downsample (frame->downsampled_frames[3], frame->downsampled_frames[2]); schro_frame_downsample (frame->downsampled_frames[4], frame->downsampled_frames[3]); }
void * schro_memory_domain_alloc (SchroMemoryDomain * domain, int size) { int i; void *ptr; SCHRO_ASSERT (domain != NULL); SCHRO_DEBUG ("alloc %d", size); schro_mutex_lock (domain->mutex); for (i = 0; i < SCHRO_MEMORY_DOMAIN_SLOTS; i++) { if (!(domain->slots[i].flags & SCHRO_MEMORY_DOMAIN_SLOT_ALLOCATED)) { continue; } if (domain->slots[i].flags & SCHRO_MEMORY_DOMAIN_SLOT_IN_USE) { continue; } if (domain->slots[i].size == size) { domain->slots[i].flags |= SCHRO_MEMORY_DOMAIN_SLOT_IN_USE; SCHRO_DEBUG ("got %p", domain->slots[i].ptr); ptr = domain->slots[i].ptr; goto done; } } for (i = 0; i < SCHRO_MEMORY_DOMAIN_SLOTS; i++) { if (domain->slots[i].flags & SCHRO_MEMORY_DOMAIN_SLOT_ALLOCATED) { continue; } domain->slots[i].flags |= SCHRO_MEMORY_DOMAIN_SLOT_ALLOCATED; domain->slots[i].flags |= SCHRO_MEMORY_DOMAIN_SLOT_IN_USE; domain->slots[i].size = size; domain->slots[i].ptr = domain->alloc (size); SCHRO_DEBUG ("created %p", domain->slots[i].ptr); ptr = domain->slots[i].ptr; goto done; } SCHRO_ASSERT (0); done: schro_mutex_unlock (domain->mutex); return ptr; }
void schro_async_signal_scheduler (SchroAsync *async) { int i; SCHRO_DEBUG("signal scheduler"); for(i=0;i<async->n_threads;i++){ SetEvent (async->threads[i].event); } }
void schro_encoder_choose_quantisers_simple (SchroEncoderFrame *frame) { SchroParams *params = &frame->params; int i; int component; double noise_amplitude; double a; double max; double *table; noise_amplitude = 255.0 * pow(0.1, frame->encoder->noise_threshold*0.05); SCHRO_DEBUG("noise %g", noise_amplitude); table = frame->encoder->subband_weights[params->wavelet_filter_index] [params->transform_depth-1]; for(component=0;component<3;component++){ for(i=0;i<1 + 3*params->transform_depth; i++) { a = noise_amplitude * frame->encoder->subband_weights[params->wavelet_filter_index] [params->transform_depth-1][i]; frame->quant_index[component][i] = schro_utils_multiplier_to_quant_index (a); } } #if 0 max = table[0]; for(i=0;i<1 + 3*params->transform_depth; i++) { if (table[i] > max) max = table[i]; } #else max = 1.0; #endif for(i=0;i<1 + 3*params->transform_depth; i++) { params->quant_matrix[i] = schro_utils_multiplier_to_quant_index (max/table[i]); SCHRO_DEBUG("%g %g %d", table[i], max/table[i], params->quant_matrix[i]); } }
static int schro_encoder_encode_slice (SchroEncoderFrame *frame, SchroLowDelay *lowdelay, int slice_x, int slice_y, int slice_bytes, int base_index) { int length_bits; int slice_y_length; int i; int start_bits; int end_bits; int16_t *quant_data = frame->quant_data; start_bits = schro_pack_get_bit_offset (frame->pack); schro_pack_encode_bits (frame->pack, 7, base_index); length_bits = ilog2up(8*slice_bytes); slice_y_length = frame->slice_y_bits - frame->slice_y_trailing_zeros; schro_pack_encode_bits (frame->pack, length_bits, slice_y_length); for(i=0;i<lowdelay->slice_y_size - frame->slice_y_trailing_zeros;i++) { schro_pack_encode_sint (frame->pack, quant_data[i]); } quant_data += lowdelay->slice_y_size; for(i=0;i<lowdelay->slice_uv_size - frame->slice_uv_trailing_zeros/2;i++) { schro_pack_encode_sint (frame->pack, quant_data[i]); schro_pack_encode_sint (frame->pack, quant_data[i+lowdelay->slice_uv_size]); } end_bits = schro_pack_get_bit_offset (frame->pack); SCHRO_DEBUG("total bits %d used bits %d expected %d", slice_bytes*8, end_bits - start_bits, 7 + length_bits + frame->slice_y_bits + frame->slice_uv_bits - frame->slice_y_trailing_zeros - frame->slice_uv_trailing_zeros); SCHRO_ASSERT(end_bits - start_bits == 7 + length_bits + frame->slice_y_bits + frame->slice_uv_bits - frame->slice_y_trailing_zeros - frame->slice_uv_trailing_zeros); if (end_bits - start_bits > slice_bytes*8) { SCHRO_ERROR("slice overran buffer by %d bits (slice_bytes %d base_index %d)", end_bits - start_bits - slice_bytes*8, slice_bytes, base_index); SCHRO_ASSERT(0); } else { int left = slice_bytes*8 - (end_bits - start_bits); for(i=0;i<left; i++) { schro_pack_encode_bit (frame->pack, 1); } } return end_bits - start_bits; }
/** * schro_params_calculate_mc_sizes: * @params: pointer to @SchroParams structure * * Calculates the size of the array used for motion compensation * using the current video format and motion compensation paramters * in the @params structure. The @params structure is updated with * the new values. * * The structure fields changed are: x_num_blocks, y_num_blocks, * mc_luma_width, mc_luma_height, mc_chroma_width, mc_chroma_height, * x_offset, y_offset. */ void schro_params_calculate_mc_sizes (SchroParams * params) { SchroVideoFormat *video_format = params->video_format; int width, height; schro_video_format_get_picture_luma_size (video_format, &width, &height); params->x_num_blocks = 4 * DIVIDE_ROUND_UP (width, 4 * params->xbsep_luma); params->y_num_blocks = 4 * DIVIDE_ROUND_UP (height, 4 * params->ybsep_luma); SCHRO_DEBUG ("picture %dx%d, num_blocks %dx%d", width, height, params->x_num_blocks, params->y_num_blocks); params->x_offset = (params->xblen_luma - params->xbsep_luma) / 2; params->y_offset = (params->yblen_luma - params->ybsep_luma) / 2; }
void schro_encoder_choose_quantisers_constant_error (SchroEncoderFrame *frame) { double base_lambda; double error; schro_encoder_generate_subband_histograms (frame); schro_encoder_calc_estimates (frame); SCHRO_ASSERT(frame->have_estimate_tables); error = 255.0 * pow(0.1, frame->encoder->noise_threshold*0.05); error *= frame->params.video_format->width * frame->params.video_format->height; base_lambda = schro_encoder_error_to_lambda (frame, error); frame->base_lambda = base_lambda; SCHRO_DEBUG("LAMBDA: %d %g", frame->frame_number, base_lambda); }
int schro_async_wait_locked (SchroAsync *async) { DWORD ret; LeaveCriticalSection (&async->mutex); ret = WaitForSingleObject (async->app_event, 1000); EnterCriticalSection (&async->mutex); if (ret == WAIT_TIMEOUT) { int i; for(i=0;i<async->n_threads;i++){ if (async->threads[i].busy) { SCHRO_DEBUG("thread %d is busy", i); break; } } if (i == async->n_threads) { SCHRO_WARNING("timeout. deadlock?"); schro_async_dump (async); return FALSE; } } return TRUE; }
void schro_motion_calculate_stats (SchroMotion *motion, SchroEncoderFrame *frame) { int i,j; SchroMotionVector *mv; int ref1 = 0; int ref2 = 0; int bidir = 0; frame->stats_dc = 0; frame->stats_global = 0; frame->stats_motion = 0; for(j=0;j<motion->params->y_num_blocks;j++){ for(i=0;i<motion->params->x_num_blocks;i++){ mv = SCHRO_MOTION_GET_BLOCK(motion,i,j); if (mv->pred_mode == 0) { frame->stats_dc++; } else { if (mv->using_global) { frame->stats_global++; } else { frame->stats_motion++; } if (mv->pred_mode == 1) { ref1++; } else if (mv->pred_mode == 2) { ref2++; } else { bidir++; } } } } SCHRO_DEBUG("dc %d global %d motion %d ref1 %d ref2 %d bidir %d", frame->stats_dc, frame->stats_global, frame->stats_motion, ref1, ref2, bidir); }
void schro_params_init (SchroParams * params, int video_format) { int i; params->transform_depth = 4; if (params->num_refs == 0) { if (video_format < 11) { params->wavelet_filter_index = SCHRO_WAVELET_DESLAURIERS_DUBUC_9_7; } else { params->wavelet_filter_index = SCHRO_WAVELET_FIDELITY; } } else { if (video_format < 11) { params->wavelet_filter_index = SCHRO_WAVELET_LE_GALL_5_3; } else { params->wavelet_filter_index = SCHRO_WAVELET_DESLAURIERS_DUBUC_9_7; } } switch (video_format) { case SCHRO_VIDEO_FORMAT_QCIF: case SCHRO_VIDEO_FORMAT_QSIF: params->xblen_luma = 8; params->yblen_luma = 8; params->xbsep_luma = 4; params->ybsep_luma = 4; break; default: case SCHRO_VIDEO_FORMAT_CUSTOM: case SCHRO_VIDEO_FORMAT_SIF: case SCHRO_VIDEO_FORMAT_CIF: case SCHRO_VIDEO_FORMAT_4SIF: case SCHRO_VIDEO_FORMAT_4CIF: case SCHRO_VIDEO_FORMAT_SD480I_60: case SCHRO_VIDEO_FORMAT_SD576I_50: params->xblen_luma = 12; params->yblen_luma = 12; params->xbsep_luma = 8; params->ybsep_luma = 8; break; case SCHRO_VIDEO_FORMAT_HD720P_60: case SCHRO_VIDEO_FORMAT_HD720P_50: params->xblen_luma = 16; params->yblen_luma = 16; params->xbsep_luma = 12; params->ybsep_luma = 12; break; case SCHRO_VIDEO_FORMAT_HD1080I_60: case SCHRO_VIDEO_FORMAT_HD1080I_50: case SCHRO_VIDEO_FORMAT_HD1080P_60: case SCHRO_VIDEO_FORMAT_HD1080P_50: case SCHRO_VIDEO_FORMAT_DC2K_24: case SCHRO_VIDEO_FORMAT_DC4K_24: case SCHRO_VIDEO_FORMAT_UHDTV_4K_60: case SCHRO_VIDEO_FORMAT_UHDTV_4K_50: case SCHRO_VIDEO_FORMAT_UHDTV_8K_60: case SCHRO_VIDEO_FORMAT_UHDTV_8K_50: params->xblen_luma = 24; params->yblen_luma = 24; params->xbsep_luma = 16; params->ybsep_luma = 16; break; } SCHRO_DEBUG ("schro_params_init %i %i %i %i", params->xblen_luma, params->yblen_luma, params->xbsep_luma, params->ybsep_luma); params->mv_precision = 2; params->picture_weight_1 = 1; params->picture_weight_2 = 1; params->picture_weight_bits = 1; if (params->num_refs == 0) { for (i = 0; i < 3; i++) { params->horiz_codeblocks[i] = 1; params->vert_codeblocks[i] = 1; } for (i = 3; i < SCHRO_LIMIT_TRANSFORM_DEPTH + 1; i++) { params->horiz_codeblocks[i] = 4; params->vert_codeblocks[i] = 3; } } else { for (i = 0; i < 2; i++) { params->horiz_codeblocks[i] = 1; params->vert_codeblocks[i] = 1; } params->horiz_codeblocks[2] = 8; params->vert_codeblocks[2] = 6; for (i = 3; i < SCHRO_LIMIT_TRANSFORM_DEPTH + 1; i++) { params->horiz_codeblocks[i] = 12; params->vert_codeblocks[i] = 8; } } /* other initializations */ params->codeblock_mode_index = 1; params->have_global_motion = FALSE; params->picture_pred_mode = 0; }
void schro_encoder_calculate_subband_weights (SchroEncoder *encoder, double (*perceptual_weight)(double)) { int wavelet; int n_levels; double *matrix; int n; int i,j; double column[SCHRO_LIMIT_SUBBANDS]; double *weight; matrix = schro_malloc (sizeof(double)*SCHRO_LIMIT_SUBBANDS*SCHRO_LIMIT_SUBBANDS); weight = schro_malloc (sizeof(double)*CURVE_SIZE*CURVE_SIZE); for(j=0;j<CURVE_SIZE;j++){ for(i=0;i<CURVE_SIZE;i++){ double fv = j*encoder->cycles_per_degree_vert*(1.0/CURVE_SIZE); double fh = i*encoder->cycles_per_degree_horiz*(1.0/CURVE_SIZE); weight[j*CURVE_SIZE+i] = perceptual_weight (sqrt(fv*fv+fh*fh)); } } for(wavelet=0;wavelet<SCHRO_N_WAVELETS;wavelet++) { for(n_levels=1;n_levels<=4;n_levels++){ const float *h_curve[SCHRO_LIMIT_SUBBANDS]; const float *v_curve[SCHRO_LIMIT_SUBBANDS]; int hi[SCHRO_LIMIT_SUBBANDS]; int vi[SCHRO_LIMIT_SUBBANDS]; n = 3*n_levels+1; for(i=0;i<n;i++){ int position = schro_subband_get_position(i); int n_transforms; n_transforms = n_levels - SCHRO_SUBBAND_SHIFT(position); if (position&1) { hi[i] = (n_transforms-1)*2; } else { hi[i] = (n_transforms-1)*2+1; } if (position&2) { vi[i] = (n_transforms-1)*2; } else { vi[i] = (n_transforms-1)*2+1; } h_curve[i] = schro_tables_wavelet_noise_curve[wavelet][hi[i]]; v_curve[i] = schro_tables_wavelet_noise_curve[wavelet][vi[i]]; } if (0) { for(i=0;i<n;i++){ column[i] = weighted_sum(h_curve[i], v_curve[i], weight); matrix[i*n+i] = dot_product (h_curve[i], v_curve[i], h_curve[i], v_curve[i], weight); for(j=i+1;j<n;j++) { matrix[i*n+j] = dot_product (h_curve[i], v_curve[i], h_curve[j], v_curve[j], weight); matrix[j*n+i] = matrix[i*n+j]; } } solve (matrix, column, n); for(i=0;i<n;i++){ if (column[i] < 0) { SCHRO_ERROR("BROKEN wavelet %d n_levels %d", wavelet, n_levels); break; } } SCHRO_DEBUG("wavelet %d n_levels %d", wavelet, n_levels); for(i=0;i<n;i++){ SCHRO_DEBUG("%g", 1.0/sqrt(column[i])); encoder->subband_weights[wavelet][n_levels-1][i] = sqrt(column[i]); } } else { for(i=0;i<n;i++){ int position = schro_subband_get_position(i); int n_transforms; double size; n_transforms = n_levels - SCHRO_SUBBAND_SHIFT(position); size = (1.0/CURVE_SIZE)*(1<<n_transforms); encoder->subband_weights[wavelet][n_levels-1][i] = 1.0/(size * sqrt(weighted_sum(h_curve[i], v_curve[i], weight))); } } } } #if 0 for(wavelet=0;wavelet<8;wavelet++) { for(n_levels=1;n_levels<=4;n_levels++){ double alpha, beta, shift; double gain; alpha = schro_tables_wavelet_gain[wavelet][0]; beta = schro_tables_wavelet_gain[wavelet][1]; shift = (1<<filtershift[wavelet]); n = 3*n_levels+1; gain = shift; for(i=n_levels-1;i>=0;i--){ encoder->subband_weights[wavelet][n_levels-1][1+3*i+0] = sqrt(alpha*beta)*gain; encoder->subband_weights[wavelet][n_levels-1][1+3*i+1] = sqrt(alpha*beta)*gain; encoder->subband_weights[wavelet][n_levels-1][1+3*i+2] = sqrt(beta*beta)*gain; gain *= alpha; gain *= shift; } encoder->subband_weights[wavelet][n_levels-1][0] = gain / shift; if (wavelet == 3 && n_levels == 3) { for(i=0;i<10;i++){ SCHRO_ERROR("%g", encoder->subband_weights[wavelet][n_levels-1][i]); } } } } #endif schro_free(weight); schro_free(matrix); }
void schro_encoder_choose_quantisers_rate_distortion (SchroEncoderFrame *frame) { //SchroParams *params = &frame->params; //int i; //int component; double base_lambda; int bits; double ratio; schro_encoder_generate_subband_histograms (frame); schro_encoder_calc_estimates (frame); SCHRO_ASSERT(frame->have_estimate_tables); if (frame->num_refs == 0) { ratio = frame->encoder->average_arith_context_ratio_intra; } else { ratio = frame->encoder->average_arith_context_ratio_inter; } frame->estimated_arith_context_ratio = CLAMP(ratio, 0.5, 1.2); bits = frame->allocated_residual_bits; base_lambda = schro_encoder_entropy_to_lambda (frame, bits); #if 0 if (frame->is_ref) { base_lambda = schro_encoder_entropy_to_lambda (frame, bits); } else { if (frame->num_refs == 0) { base_lambda = schro_encoder_entropy_to_lambda (frame, bits); } else if (frame->num_refs == 1) { if (frame->is_ref) { base_lambda = schro_encoder_entropy_to_lambda (frame, bits); } else { base_lambda = frame->ref_frame0->base_lambda; } } else { base_lambda = 0.5 * (frame->ref_frame0->base_lambda + frame->ref_frame1->base_lambda); } if (!frame->is_ref) { base_lambda *= frame->encoder->magic_nonref_lambda_scale; } } #endif frame->base_lambda = base_lambda; SCHRO_DEBUG("LAMBDA: %d %g %d", frame->frame_number, base_lambda, bits); schro_encoder_lambda_to_entropy (frame, base_lambda); #if 0 for(component=0;component<3;component++){ for(i=0;i<1 + 3*params->transform_depth; i++) { double lambda; double weight; lambda = base_lambda; if (i == 0) { lambda *= frame->encoder->magic_subband0_lambda_scale; } if (component > 0) { lambda *= frame->encoder->magic_chroma_lambda_scale; } weight = frame->encoder->subband_weights[frame->params.wavelet_filter_index] [frame->params.transform_depth-1][i]; lambda /= weight*weight; frame->quant_index[component][i] = schro_subband_pick_quant (frame, component, i, lambda); } } #endif }
SchroAsync * schro_async_new (int n_threads, SchroAsyncScheduleFunc schedule, SchroAsyncCompleteFunc complete, void *closure) { SchroAsync *async; pthread_attr_t attr; pthread_mutexattr_t mutexattr; pthread_condattr_t condattr; int i; if (n_threads == 0) { char *s; s = getenv ("SCHRO_THREADS"); if (s && s[0]) { char *end; int n; n = strtoul (s, &end, 0); if (end[0] == 0) { n_threads = n; } } if (n_threads == 0) { #if defined(_WIN32) const char *s = getenv ("NUMBER_OF_PROCESSORS"); if (s) { n_threads = atoi (s); } #elif defined(__APPLE__) { int mib[] = { CTL_HW, HW_NCPU }; size_t dataSize = sizeof (int); if (sysctl (mib, 2, &n_threads, &dataSize, NULL, 0)) { n_threads = 0; } } #else n_threads = sysconf (_SC_NPROCESSORS_CONF); #endif } if (n_threads == 0) { n_threads = 1; } } async = schro_malloc0 (sizeof (SchroAsync)); SCHRO_DEBUG ("%d", n_threads); async->n_threads = n_threads; async->threads = schro_malloc0 (sizeof (SchroThread) * (n_threads + 1)); async->stop = RUNNING; async->schedule = schedule; async->schedule_closure = closure; async->complete = complete; pthread_mutexattr_init (&mutexattr); pthread_mutex_init (&async->mutex, &mutexattr); pthread_condattr_init (&condattr); pthread_cond_init (&async->app_cond, &condattr); pthread_cond_init (&async->thread_cond, &condattr); if (!domain_key_inited) { pthread_key_create (&domain_key, NULL); domain_key_inited = TRUE; } pthread_attr_init (&attr); pthread_mutex_lock (&async->mutex); for (i = 0; i < n_threads; i++) { SchroThread *thread = async->threads + i; thread->async = async; thread->index = i; thread->exec_domain = SCHRO_EXEC_DOMAIN_CPU; pthread_create (&async->threads[i].pthread, &attr, schro_thread_main, async->threads + i); pthread_mutex_lock (&async->mutex); } pthread_mutex_unlock (&async->mutex); pthread_attr_destroy (&attr); pthread_mutexattr_destroy (&mutexattr); pthread_condattr_destroy (&condattr); return async; }
static void * schro_thread_main (void *ptr) { void (*func) (void *); void *priv; SchroThread *thread = ptr; SchroAsync *async = thread->async; int ret; /* thread starts with async->mutex locked */ pthread_setspecific (domain_key, (void *) (unsigned long) thread->exec_domain); async->n_threads_running++; thread->busy = FALSE; while (1) { /* check for deaths each time */ if (async->stop != RUNNING) { async->n_idle++; thread->busy = FALSE; pthread_cond_signal (&async->app_cond); if (async->stop == DIE) { async->n_threads_running--; pthread_mutex_unlock (&async->mutex); SCHRO_DEBUG ("thread %d: dying", thread->index); return NULL; } SCHRO_DEBUG ("thread %d: stopping (until restarted)", thread->index); pthread_cond_wait (&async->thread_cond, &async->mutex); SCHRO_DEBUG ("thread %d: resuming", thread->index); async->n_idle--; continue; } if (thread->busy == 0) { async->n_idle++; SCHRO_DEBUG ("thread %d: idle", thread->index); pthread_cond_wait (&async->thread_cond, &async->mutex); SCHRO_DEBUG ("thread %d: got signal", thread->index); async->n_idle--; thread->busy = TRUE; /* check for stop requests before doing work */ continue; } if (1) { /* avoiding indent change */ ret = async->schedule (async->schedule_closure, thread->exec_domain); /* FIXME ignoring ret */ if (!async->task.task_func) { thread->busy = FALSE; continue; } thread->busy = TRUE; func = async->task.task_func; priv = async->task.priv; async->task.task_func = NULL; if (async->n_idle > 0) { pthread_cond_signal (&async->thread_cond); } pthread_mutex_unlock (&async->mutex); SCHRO_DEBUG ("thread %d: running", thread->index); func (priv); SCHRO_DEBUG ("thread %d: done", thread->index); pthread_mutex_lock (&async->mutex); async->complete (priv); pthread_cond_signal (&async->app_cond); #if defined HAVE_CUDA || defined HAVE_OPENGL /* FIXME */ /* This is required because we don't have a better mechanism * for indicating to threads in other exec domains that it is * their turn to run. It's mostly harmless, although causes * a lot of unnecessary wakeups in some cases. */ pthread_cond_broadcast (&async->thread_cond); #endif } } }
SchroAsync * schro_async_new(int n_threads, SchroAsyncScheduleFunc schedule, SchroAsyncCompleteFunc complete, void *closure) { SchroAsync *async; int i; if (n_threads == 0) { char *s; s = getenv ("SCHRO_THREADS"); if (s && s[0]) { char *end; int n; n = strtoul (s, &end, 0); if (end[0] == 0) { n_threads = n; } } if (n_threads == 0) { const char *s = getenv("NUMBER_OF_PROCESSORS"); if (s) { n_threads = atoi(s); } } if (n_threads == 0) { n_threads = 1; } } async = schro_malloc0 (sizeof(SchroAsync)); SCHRO_DEBUG("%d", n_threads); async->n_threads = n_threads; async->threads = schro_malloc0 (sizeof(SchroThread) * (n_threads + 1)); async->schedule = schedule; async->schedule_closure = closure; async->complete = complete; InitializeCriticalSection (&async->mutex); async->app_event = CreateEvent (0, FALSE, FALSE, NULL); EnterCriticalSection (&async->mutex); for(i=0;i<n_threads;i++){ SchroThread *thread = async->threads + i; unsigned int ignore; thread->event = CreateEvent (0, FALSE, FALSE, NULL); thread->async = async; thread->index = i; thread->exec_domain = SCHRO_EXEC_DOMAIN_CPU; async->threads[i].thread = (HANDLE) _beginthreadex (NULL, STACK_SIZE, schro_thread_main, async->threads + i, 0, &ignore); EnterCriticalSection (&async->mutex); } LeaveCriticalSection (&async->mutex); return async; }
double schro_encoder_entropy_to_lambda (SchroEncoderFrame *frame, double entropy) { int j; double log_lambda_hi, log_lambda_lo, log_lambda_mid; double entropy_hi, entropy_lo, entropy_mid; log_lambda_hi = log(1); entropy_hi = schro_encoder_lambda_to_entropy (frame, exp(log_lambda_hi)); SCHRO_DEBUG("start target=%g log_lambda=%g entropy=%g", entropy, log_lambda_hi, entropy_hi, log_lambda_hi, entropy); if (entropy_hi < entropy) { entropy_lo = entropy_hi; log_lambda_lo = log_lambda_hi; for(j=0;j<5;j++) { log_lambda_hi = log_lambda_lo + log(100); entropy_hi = schro_encoder_lambda_to_entropy (frame, exp(log_lambda_hi)); SCHRO_DEBUG("have: log_lambda=[%g,%g] entropy=[%g,%g] target=%g", log_lambda_lo, log_lambda_hi, entropy_lo, entropy_hi, entropy); if (entropy_hi > entropy) break; SCHRO_DEBUG("--> step up"); entropy_lo = entropy_hi; log_lambda_lo = log_lambda_hi; } SCHRO_DEBUG("--> stopping"); } else { for(j=0;j<5;j++) { log_lambda_lo = log_lambda_hi - log(100); entropy_lo = schro_encoder_lambda_to_entropy (frame, exp(log_lambda_lo)); SCHRO_DEBUG("have: log_lambda=[%g,%g] entropy=[%g,%g] target=%g", log_lambda_lo, log_lambda_hi, entropy_lo, entropy_hi, entropy); SCHRO_DEBUG("--> step down"); if (entropy_lo < entropy) break; entropy_hi = entropy_lo; log_lambda_hi = log_lambda_lo; } SCHRO_DEBUG("--> stopping"); } if (entropy_lo == entropy_hi) { return exp(0.5*(log_lambda_lo + log_lambda_hi)); } if (entropy_lo > entropy || entropy_hi < entropy) { SCHRO_ERROR("entropy not bracketed"); } for(j=0;j<14;j++){ double x; if (entropy_hi == entropy_lo) break; SCHRO_DEBUG("have: log_lambda=[%g,%g] entropy=[%g,%g] target=%g", log_lambda_lo, log_lambda_hi, entropy_lo, entropy_hi, entropy); #if 0 x = (entropy - entropy_lo) / (entropy_hi - entropy_lo); if (x < 0.2) x = 0.2; if (x > 0.8) x = 0.8; #else x = 0.5; #endif log_lambda_mid = log_lambda_lo + (log_lambda_hi - log_lambda_lo) * x; entropy_mid = schro_encoder_lambda_to_entropy (frame, exp(log_lambda_mid)); SCHRO_DEBUG("picking x=%g log_lambda_mid=%g entropy=%g", x, log_lambda_mid, entropy_mid); if (entropy_mid > entropy) { log_lambda_hi = log_lambda_mid; entropy_hi = entropy_mid; SCHRO_DEBUG("--> focus up"); } else { log_lambda_lo = log_lambda_mid; entropy_lo = entropy_mid; SCHRO_DEBUG("--> focus down"); } } log_lambda_mid = 0.5*(log_lambda_hi + log_lambda_lo); SCHRO_DEBUG("done %g", exp(log_lambda_mid)); return exp(log_lambda_mid); }
void schro_motion_field_global_estimation (SchroMotionField *mf, SchroGlobalMotion *gm, int mv_precision) { int i; int j; int k; SchroMotionVector *mv; for(j=0;j<mf->y_num_blocks;j++) { for(i=0;i<mf->x_num_blocks;i++) { mv = mf->motion_vectors + j*mf->x_num_blocks + i; mv->using_global = 1; /* HACK */ if (j >= mf->y_num_blocks - 8 || i >= mf->x_num_blocks - 8) { mv->using_global = 0; } } } for(k=0;k<4;k++){ double m_x, m_y; double m_f, m_g; double pan_x, pan_y; double ave_x, ave_y; double m_fx, m_fy, m_gx, m_gy; double m_xx, m_yy; double a00, a01, a10, a11; double sum2; double stddev2; int n = 0; SCHRO_DEBUG("step %d", k); m_x = 0; m_y = 0; m_f = 0; m_g = 0; for(j=0;j<mf->y_num_blocks;j++) { for(i=0;i<mf->x_num_blocks;i++) { mv = mf->motion_vectors + j*mf->x_num_blocks + i; if (mv->using_global) { m_f += mv->dx[0]; m_g += mv->dy[0]; m_x += i*8; m_y += j*8; n++; } } } pan_x = m_f / n; pan_y = m_g / n; ave_x = m_x / n; ave_y = m_y / n; SCHRO_DEBUG("pan %f %f ave %f %f n %d", pan_x, pan_y, ave_x, ave_y, n); m_fx = 0; m_fy = 0; m_gx = 0; m_gy = 0; m_xx = 0; m_yy = 0; n = 0; for(j=0;j<mf->y_num_blocks;j++) { for(i=0;i<mf->x_num_blocks;i++) { mv = mf->motion_vectors + j*mf->x_num_blocks + i; if (mv->using_global) { m_fx += (mv->dx[0] - pan_x) * (i*8 - ave_x); m_fy += (mv->dx[0] - pan_x) * (j*8 - ave_y); m_gx += (mv->dy[0] - pan_y) * (i*8 - ave_x); m_gy += (mv->dy[0] - pan_y) * (j*8 - ave_y); m_xx += (i*8 - ave_x) * (i*8 - ave_x); m_yy += (j*8 - ave_y) * (j*8 - ave_y); n++; } } } SCHRO_DEBUG("m_fx %f m_gx %f m_xx %f n %d", m_fx, m_gx, m_xx, n); a00 = m_fx / m_xx; a01 = m_fy / m_yy; a10 = m_gx / m_xx; a11 = m_gy / m_yy; pan_x -= a00*ave_x + a01*ave_y; pan_y -= a10*ave_x + a11*ave_y; SCHRO_DEBUG("pan %f %f a[] %f %f %f %f", pan_x, pan_y, a00, a01, a10, a11); sum2 = 0; for(j=0;j<mf->y_num_blocks;j++) { for(i=0;i<mf->x_num_blocks;i++) { mv = mf->motion_vectors + j*mf->x_num_blocks + i; if (mv->using_global) { double dx, dy; dx = mv->dx[0] - (pan_x + a00 * i + a01 * j); dy = mv->dy[0] - (pan_y + a10 * i + a11 * j); sum2 += dx * dx + dy * dy; } } } stddev2 = sum2/n; SCHRO_DEBUG("stddev %f", sqrt(sum2/n)); if (stddev2 < 1) stddev2 = 1; n = 0; for(j=0;j<mf->y_num_blocks;j++) { for(i=0;i<mf->x_num_blocks;i++) { double dx, dy; mv = mf->motion_vectors + j*mf->x_num_blocks + i; dx = mv->dx[0] - (pan_x + a00 * i + a01 * j); dy = mv->dy[0] - (pan_y + a10 * i + a11 * j); mv->using_global = (dx * dx + dy * dy < stddev2*16); n += mv->using_global; } } SCHRO_DEBUG("using n = %d", n); gm->b0 = rint(pan_x*(0.125*(1<<mv_precision))); gm->b1 = rint(pan_y*(0.125*(1<<mv_precision))); gm->a_exp = 16; gm->a00 = rint((1.0 + a00/8) * (1<<(gm->a_exp + mv_precision))); gm->a01 = rint(a01/8 * (1<<(gm->a_exp + mv_precision))); gm->a10 = rint(a10/8 * (1<<(gm->a_exp + mv_precision))); gm->a11 = rint((1.0 + a11/8) * (1<<(gm->a_exp + mv_precision))); } for(j=0;j<mf->y_num_blocks;j++) { for(i=0;i<mf->x_num_blocks;i++) { mv = mf->motion_vectors + j*mf->x_num_blocks + i; mv->using_global = 1; //mv->dx[0] = gm->b0 + ((gm->a00 * (i*8) + gm->a01 * (j*8))>>gm->a_exp) - i*8; //mv->dy[0] = gm->b1 + ((gm->a10 * (i*8) + gm->a11 * (j*8))>>gm->a_exp) - j*8; mv->dx[0] = 0; mv->dy[0] = 0; } } }
static unsigned int __stdcall schro_thread_main (void *ptr) { void (*func)(void *); void *priv; SchroThread *thread = ptr; SchroAsync *async = thread->async; int ret; /* thread starts with async->mutex locked */ TlsSetValue (domain_key, (void *)(unsigned long)thread->exec_domain); async->n_threads_running++; while (1) { async->n_idle++; thread->busy = FALSE; LeaveCriticalSection (&async->mutex); SCHRO_DEBUG("thread %d: idle, waiting for event", thread->index); WaitForSingleObject (thread->event, INFINITE); SCHRO_DEBUG("thread %d: got event", thread->index); EnterCriticalSection (&async->mutex); async->n_idle--; thread->busy = TRUE; if (async->stop) { SetEvent (async->app_event); async->n_threads_running--; LeaveCriticalSection (&async->mutex); SCHRO_DEBUG("thread %d: stopping", thread->index); return 0; } ret = async->schedule (async->schedule_closure, thread->exec_domain); /* FIXME ignoring ret */ if (!async->task_func) { continue; } func = async->task_func; priv = async->task_priv; async->task_func = NULL; LeaveCriticalSection (&async->mutex); SCHRO_DEBUG("thread %d: running", thread->index); func (priv); SCHRO_DEBUG("thread %d: done", thread->index); EnterCriticalSection (&async->mutex); async->complete (priv); SetEvent (async->app_event); #ifdef HAVE_CUDA /* FIXME */ /* This is required because we don't have a better mechanism * for indicating to threads in other exec domains that it is * their turn to run. It's mostly harmless, although causes * a lot of unnecessary wakeups in some cases. */ { int i; for(i=0;i<async->n_threads) { SetEvent (async->thread_event); } } #endif } }