int image_init(image *img, jpeg_header *header) { int hmax; int vmax; int i; memset(img, 0, sizeof(image)); img->width = header->width; img->height = header->height; img->nplanes = header->ncomps; hmax = 0; vmax = 0; for (i = 0; i < img->nplanes; i++) { jpeg_component *comp; comp = &header->comp[i]; hmax = OD_MAXI(hmax, comp->hsamp); vmax = OD_MAXI(vmax, comp->vsamp); } for (i = 0; i < img->nplanes; i++) { jpeg_component *comp; image_plane *plane; comp = &header->comp[i]; plane = &img->plane[i]; plane->width = comp->hblocks << 3; plane->height = comp->vblocks << 3; /* TODO support 16-bit images */ plane->xstride = 1; plane->ystride = plane->xstride*plane->width; plane->xdec = OD_ILOG(hmax) - OD_ILOG(comp->hsamp); plane->ydec = OD_ILOG(vmax) - OD_ILOG(comp->vsamp); plane->data = od_aligned_malloc(plane->ystride*plane->height, IMAGE_ALIGN); if (plane->data == NULL) { image_clear(img); return EXIT_FAILURE; } plane->coef = od_aligned_malloc(plane->width*plane->height*sizeof(short), IMAGE_ALIGN); if (plane->coef == NULL) { image_clear(img); return EXIT_FAILURE; } } img->pixels = od_aligned_malloc(img->width*img->height*3, IMAGE_ALIGN); if (img->pixels == NULL) { image_clear(img); return EXIT_FAILURE; } return EXIT_SUCCESS; }
/** Find the codepoint on the given PSphere closest to the desired * vector. Double-precision PVQ search just to make sure our tests * aren't limited by numerical accuracy. * * @param [in] xcoeff input vector to quantize (x in the math doc) * @param [in] n number of dimensions * @param [in] k number of pulses * @param [out] ypulse optimal codevector found (y in the math doc) * @return cosine distance between x and y (between 0 and 1) */ static double pvq_search_double(const double *xcoeff, int n, int k, od_coeff *ypulse) { int i, j; double xy; double yy; double x[1024]; double xx; xx = xy = yy = 0; for (j = 0; j < n; j++) { x[j] = fabs(xcoeff[j]); xx += x[j]*x[j]; } i = 0; if (k > 2) { double l1_norm; double l1_inv; l1_norm = 0; for (j = 0; j < n; j++) l1_norm += x[j]; l1_inv = 1./OD_MAXF(l1_norm, 1e-100); for (j = 0; j < n; j++) { ypulse[j] = OD_MAXI(0, (int)floor(k*x[j]*l1_inv)); xy += x[j]*ypulse[j]; yy += ypulse[j]*ypulse[j]; i += ypulse[j]; } } else { for (j = 0; j < n; j++) ypulse[j] = 0; } /* Search one pulse at a time */ for (; i < k; i++) { int pos; double best_xy; double best_yy; pos = 0; best_xy = -10; best_yy = 1; for (j = 0; j < n; j++) { double tmp_xy; double tmp_yy; tmp_xy = xy + x[j]; tmp_yy = yy + 2*ypulse[j] + 1; tmp_xy *= tmp_xy; if (j == 0 || tmp_xy*best_yy > best_xy*tmp_yy) { best_xy = tmp_xy; best_yy = tmp_yy; pos = j; } } xy = xy + x[pos]; yy = yy + 2*ypulse[pos] + 1; ypulse[pos]++; } for (i = 0; i < n; i++) { if (xcoeff[i] < 0) ypulse[i] = -ypulse[i]; } return xy/(1e-100 + sqrt(xx*yy)); }
/** Compute the number of pulses used for PVQ encoding a vector from * available metrics (encode and decode side) * * @param [in] qcg quantized companded gain value * @param [in] itheta quantizized PVQ error angle theta * @param [in] theta PVQ error angle theta * @param [in] noref indicates present or lack of reference * (prediction) * @param [in] n number of elements to be coded * @param [in] beta activity masking beta param * @param [in] nodesync do not use info that depend on the reference * @return number of pulses to use for coding */ int od_pvq_compute_k(double qcg, int itheta, double theta, int noref, int n, double beta, int nodesync) { if (noref) { if (qcg == 0) return 0; if (n == 15 && qcg == 1 && beta > 1.25) return 1; else return OD_MAXI(1, (int)floor(.5 + (qcg - .2)*sqrt((n+3)/2)/beta)); } else { if (itheta == 0) return 0; /* Sets K according to gain and theta, based on the high-rate PVQ distortion curves (see PVQ document). Low-rate will have to be perceptually tuned anyway. We subtract 0.2 from the radius as an approximation for the fact that the coefficients aren't identically distributed within a band so at low gain the number of dimensions that are likely to have a pulse is less than n. */ if (nodesync) { return OD_MAXI(1, (int)floor(.5 + (itheta - .2)*sqrt((n + 2)/2))); } else { return OD_MAXI(1, (int)floor(.5 + (qcg*sin(theta) - .2)* sqrt((n + 2)/2)/beta)); } } }
/** Takes the base-2 log of E(x) * * @param [in] ExQ16 expectation of x in Q16 * * @retval 2*log2(ExQ16/2^16) */ int logEx(int ExQ16) { int lg; int lgQ1; int odd; lg = od_ilog(ExQ16); if (lg<15) { odd = ExQ16*ExQ16 > 2<<2*lg; } else { int tmp=ExQ16>>(lg-8); odd = tmp*tmp > (1<<15); } lgQ1 = OD_MAXI(0,2*lg - 33 + odd); return lgQ1; }
int main(int _argc,char **_argv) { od_ec_enc enc; od_ec_enc enc_bak; od_ec_dec dec; long nbits; long nbits2; double entropy; int ft; int ftb; int sz; int i; int ret; unsigned int sym; unsigned int seed; unsigned char *ptr; ogg_uint32_t ptr_sz; const char *env_seed; ret=EXIT_SUCCESS; entropy=0; if(_argc>2) { fprintf(stderr,"Usage: %s [<seed>]\n",_argv[0]); return EXIT_FAILURE; } env_seed=getenv("SEED"); if(_argc>1)seed=atoi(_argv[1]); else if(env_seed)seed=atoi(env_seed); else seed=time(NULL); /*Trigger resize during termination.*/ for(ft=2; ft<1024; ft++) { for(i=0; i<ft; i++) { od_ec_enc_init(&enc,ft+i&1); od_ec_enc_uint(&enc,i,ft); nbits=od_ec_enc_tell_frac(&enc); ptr=od_ec_enc_done(&enc,&ptr_sz); od_ec_dec_init(&dec,ptr,ptr_sz); sym=od_ec_dec_uint(&dec,ft); if(sym!=(unsigned)i) { fprintf(stderr,"Decoded %i instead of %i with ft of %i.\n",sym,i,ft); ret=EXIT_FAILURE; } nbits2=od_ec_dec_tell_frac(&dec); if(nbits!=nbits2) { fprintf(stderr,"enc_tell_frac == %li, dec_tell_frac == %li\n", nbits,nbits2); ret=EXIT_FAILURE; } if(dec.error) { fprintf(stderr,"uint error decoding %i with ft of %i.\n",i,ft); ret=EXIT_FAILURE; } od_ec_enc_clear(&enc); } } /*Raw bits only w/ resize*/ for(ftb=1; ftb<17; ftb++) { for(i=0; i<(1<<ftb); i++) { od_ec_enc_init(&enc,ftb+i&1); od_ec_enc_checkpoint(&enc_bak,&enc); od_ec_enc_bits(&enc,i,ftb); od_ec_enc_rollback(&enc,&enc_bak); od_ec_enc_bits(&enc,i,ftb); ptr=od_ec_enc_done(&enc,&ptr_sz); if(ptr_sz!=(unsigned)ftb+7>>3) { fprintf(stderr,"Used %li bytes to encode %i bits directly.\n", (long)ptr_sz,ftb); ret=EXIT_FAILURE; } od_ec_dec_init(&dec,ptr,ptr_sz); sym=od_ec_dec_bits(&dec,ftb); if(sym!=(unsigned)i) { fprintf(stderr,"Decoded %i instead of %i with ftb of %i.\n",sym,i,ftb); ret=EXIT_FAILURE; } od_ec_enc_clear(&enc); } } /*Testing unsigned integer corruption*/ od_ec_enc_init(&enc,2); od_ec_enc_uint(&enc,128,129); od_ec_enc_checkpoint(&enc_bak,&enc); od_ec_enc_uint(&enc,128,129); od_ec_enc_uint(&enc,128,129); od_ec_enc_uint(&enc,128,129); od_ec_enc_rollback(&enc,&enc_bak); ptr=od_ec_enc_done(&enc,&ptr_sz); if(ptr_sz!=1) { fprintf(stderr,"Incorrect output size %li.\n",(long)ptr_sz); ret=EXIT_FAILURE; } for(i=0; i<256; i++) { ptr[ptr_sz-1]=i; od_ec_dec_init(&dec,ptr,ptr_sz); sym=od_ec_dec_uint(&dec,129); if(i>=228 && i!=240 && !dec.error) { fprintf(stderr,"Failed to detect uint error with %i.\n",i); ret=EXIT_FAILURE; } if(sym>=255) { fprintf(stderr,"Corrupt uint out of range %i>=255 for %d.\n",sym,i); ret=EXIT_FAILURE; } } od_ec_enc_clear(&enc); /*Testing encoding of unsigned integers.*/ od_ec_enc_init(&enc,1); for(ft=2; ft<1024; ft++) { for(i=0; i<ft; i++) { entropy+=log(ft)*M_LOG2E; od_ec_enc_checkpoint(&enc_bak,&enc); od_ec_enc_uint(&enc,0,ft); od_ec_enc_rollback(&enc,&enc_bak); od_ec_enc_uint(&enc,i,ft); od_ec_enc_checkpoint(&enc_bak,&enc); od_ec_enc_uint(&enc,1,ft); od_ec_enc_rollback(&enc,&enc_bak); } if(ft==512)ptr=od_ec_enc_done(&enc,&ptr_sz); } /*Testing encoding of raw bit values.*/ for(ftb=1; ftb<16; ftb++) { for(i=0; i<(1<<ftb); i++) { entropy+=ftb; nbits=od_ec_enc_tell(&enc); od_ec_enc_bits(&enc,i,ftb); nbits2=od_ec_enc_tell(&enc); if(nbits2-nbits!=ftb) { fprintf(stderr,"Used %li bits to encode %i bits directly.\n", nbits2-nbits,ftb); ret=EXIT_FAILURE; } } } nbits=od_ec_enc_tell_frac(&enc); ptr=od_ec_enc_done(&enc,&ptr_sz); fprintf(stderr, "Encoded %0.2f bits of entropy to %0.2f bits (%0.3f%% wasted).\n", entropy,ldexp(nbits,-3),100*(nbits-ldexp(entropy,3))/nbits); fprintf(stderr,"Packed to %li bytes.\n",(long)ptr_sz); od_ec_dec_init(&dec,ptr,ptr_sz); for(ft=2; ft<1024; ft++) { for(i=0; i<ft; i++) { sym=od_ec_dec_uint(&dec,ft); if(sym!=(unsigned)i) { fprintf(stderr,"Decoded %i instead of %i with ft of %i.\n",sym,i,ft); ret=EXIT_FAILURE; } } } for(ftb=1; ftb<16; ftb++) { for(i=0; i<(1<<ftb); i++) { sym=od_ec_dec_bits(&dec,ftb); if(sym!=(unsigned)i) { fprintf(stderr,"Decoded %i instead of %i with ftb of %i.\n",sym,i,ftb); ret=EXIT_FAILURE; } } } nbits2=od_ec_dec_tell_frac(&dec); if(nbits!=nbits2) { fprintf(stderr, "Reported number of bits used was %0.2f, should be %0.2f.\n", ldexp(nbits2,-3),ldexp(nbits,-3)); ret=EXIT_FAILURE; } srand(seed); fprintf(stderr,"Testing random streams... Random seed: %u (%.4X).\n", seed,rand()&65535); for(i=0; i<409600; i++) { unsigned *data; unsigned *tell; unsigned tell_bits; int j; int zeros; ft=rand()/((RAND_MAX>>(rand()%11U))+1U)+10; sz=rand()/((RAND_MAX>>(rand()%9U))+1U); data=(unsigned *)malloc(sz*sizeof(*data)); tell=(unsigned *)malloc((sz+1)*sizeof(*tell)); od_ec_enc_reset(&enc); zeros=rand()%13==0; tell[0]=od_ec_enc_tell_frac(&enc); for(j=0; j<sz; j++) { if(zeros)data[j]=0; else data[j]=rand()%ft; od_ec_enc_uint(&enc,data[j],ft); tell[j+1]=od_ec_enc_tell_frac(&enc); if(rand()&7==0) { od_ec_enc_checkpoint(&enc_bak,&enc); od_ec_enc_uint(&enc,rand()&1?0:ft-1,ft); od_ec_enc_rollback(&enc,&enc_bak); } } if(!(rand()&1)) { while(od_ec_enc_tell(&enc)&7)od_ec_enc_uint(&enc,rand()&1,2); } tell_bits=od_ec_enc_tell(&enc); ptr=od_ec_enc_done(&enc,&ptr_sz); if(tell_bits!=(unsigned)od_ec_enc_tell(&enc)) { fprintf(stderr,"od_ec_enc_tell() changed after od_ec_enc_done(): " "%u instead of %u (Random seed: %u).\n", (unsigned)od_ec_enc_tell(&enc),tell_bits,seed); ret=EXIT_FAILURE; } if(tell_bits+7>>3<ptr_sz) { fprintf(stderr,"od_ec_enc_tell() lied: " "there's %i bytes instead of %i (Random seed: %u).\n", ptr_sz,tell_bits+7>>3,seed); ret=EXIT_FAILURE; } od_ec_dec_init(&dec,ptr,ptr_sz); if(od_ec_dec_tell_frac(&dec)!=tell[0]) { fprintf(stderr,"od_ec_dec_tell() mismatch between encoder and decoder " "at symbol %i: %u instead of %u (Random seed: %u).\n", 0,(unsigned)od_ec_dec_tell_frac(&dec),tell[0],seed); ret=EXIT_FAILURE; } for(j=0; j<sz; j++) { sym=od_ec_dec_uint(&dec,ft); if(sym!=data[j]) { fprintf(stderr,"Decoded %i instead of %i with ft of %i " "at position %i of %i (Random seed: %u).\n", sym,data[j],ft,j,sz,seed); ret=EXIT_FAILURE; } if(od_ec_dec_tell_frac(&dec)!=tell[j+1]) { fprintf(stderr,"od_ec_dec_tell() mismatch between encoder and decoder " "at symbol %i: %u instead of %u (Random seed: %u).\n", j+1,(unsigned)od_ec_dec_tell_frac(&dec),tell[j+1],seed); ret=EXIT_FAILURE; } } free(tell); free(data); } /*Test compatibility between multiple different encode/decode routines.*/ for(i=0; i<409600; i++) { unsigned *fz; unsigned *ftb; unsigned *data; unsigned *tell; unsigned *enc_method; int j; sz=rand()/((RAND_MAX>>(rand()%9U))+1U); fz=(unsigned *)malloc(sz*sizeof(*fz)); ftb=(unsigned *)malloc(sz*sizeof(*ftb)); data=(unsigned *)malloc(sz*sizeof(*data)); tell=(unsigned *)malloc((sz+1)*sizeof(*tell)); enc_method=(unsigned *)malloc(sz*sizeof(*enc_method)); od_ec_enc_reset(&enc); tell[0]=od_ec_enc_tell_frac(&enc); for(j=0; j<sz; j++) { data[j]=rand()/((RAND_MAX>>1)+1); ftb[j]=(rand()%15)+1; fz[j]=rand()%32766>>15-ftb[j]; fz[j]=OD_MAXI(fz[j],1); enc_method[j]=rand()&1; switch(enc_method[j]) { case 0: { if(rand()&1)od_ec_encode_bool_q15(&enc,data[j],fz[j]<<15-ftb[j]); else od_ec_encode_bool(&enc,data[j],fz[j]<<15-ftb[j],32768); } break; case 1: { ogg_uint16_t cdf[2]; cdf[0]=fz[j]; cdf[1]=1U<<ftb[j]; od_ec_encode_cdf_unscaled_dyadic(&enc,data[j],cdf,2,ftb[j]); } break; } tell[j+1]=od_ec_enc_tell_frac(&enc); } ptr=od_ec_enc_done(&enc,&ptr_sz); if(od_ec_enc_tell(&enc)+7U>>3<ptr_sz) { fprintf(stderr,"od_ec_enc_tell() lied: " "there's %i bytes instead of %i (Random seed: %u).\n", ptr_sz,od_ec_enc_tell(&enc)+7>>3,seed); ret=EXIT_FAILURE; } od_ec_dec_init(&dec,ptr,ptr_sz); if(od_ec_dec_tell_frac(&dec)!=tell[0]) { fprintf(stderr,"od_ec_dec_tell() mismatch between encoder and decoder " "at symbol %i: %u instead of %u (Random seed: %u).\n", 0,(unsigned)od_ec_dec_tell_frac(&dec),tell[0],seed); ret=EXIT_FAILURE; } for(j=0; j<sz; j++) { int dec_method; dec_method=rand()&1; switch(dec_method) { case 0: { if(rand()&1)sym=od_ec_decode_bool_q15(&dec,fz[j]<<15-ftb[j]); else sym=od_ec_decode_bool(&dec,fz[j]<<15-ftb[j],32768); } break; case 1: { ogg_uint16_t cdf[2]; cdf[0]=fz[j]; cdf[1]=1U<<ftb[j]; sym=od_ec_decode_cdf_unscaled_dyadic(&dec,cdf,2,ftb[j]); } break; } if(sym!=data[j]) { fprintf(stderr,"Decoded %i instead of %i with fz=%i and ftb=%i " "at position %i of %i (Random seed: %u).\n", sym,data[j],fz[j],ftb[j],j,sz,seed); fprintf(stderr,"Encoding method: %i, decoding method: %i\n", enc_method[j],dec_method); ret=EXIT_FAILURE; } if(od_ec_dec_tell_frac(&dec)!=tell[j+1]) { fprintf(stderr,"od_ec_dec_tell() mismatch between encoder and decoder " "at symbol %i: %u instead of %u (Random seed: %u).\n", j+1,(unsigned)od_ec_dec_tell_frac(&dec),tell[j+1],seed); ret=EXIT_FAILURE; } } free(enc_method); free(tell); free(data); free(ftb); free(fz); } od_ec_enc_reset(&enc); od_ec_encode_bool_q15(&enc,0,16384); od_ec_encode_bool_q15(&enc,0,16384); od_ec_encode_bool_q15(&enc,0,16384); od_ec_encode_bool_q15(&enc,0,16384); od_ec_encode_bool_q15(&enc,0,24576); od_ec_enc_patch_initial_bits(&enc,3,2); if(enc.error) { fprintf(stderr,"od_ec_enc_patch_initial_bits() failed.\n"); ret=EXIT_FAILURE; } od_ec_enc_patch_initial_bits(&enc,0,5); if(!enc.error) { fprintf(stderr, "od_ec_enc_patch_initial_bits() didn't fail when it should have.\n"); ret=EXIT_FAILURE; } od_ec_enc_reset(&enc); od_ec_encode_bool_q15(&enc,0,16384); od_ec_encode_bool_q15(&enc,0,16384); od_ec_encode_bool_q15(&enc,1,32256); od_ec_encode_bool_q15(&enc,0,24576); od_ec_enc_patch_initial_bits(&enc,0,2); if(enc.error) { fprintf(stderr,"od_ec_enc_patch_initial_bits() failed.\n"); ret=EXIT_FAILURE; } ptr=od_ec_enc_done(&enc,&ptr_sz); if(ptr_sz!=2||ptr[0]!=63) { fprintf(stderr, "Got %i when expecting 63 for od_ec_enc_patch_initial_bits().\n",ptr[0]); ret=EXIT_FAILURE; } od_ec_enc_clear(&enc); return ret; }
/** Find the codepoint on the given PSphere closest to the desired * vector. Double-precision PVQ search just to make sure our tests * aren't limited by numerical accuracy. * * @param [in] xcoeff input vector to quantize (x in the math doc) * @param [in] n number of dimensions * @param [in] k number of pulses * @param [out] ypulse optimal codevector found (y in the math doc) * @param [out] g2 multiplier for the distortion (typically squared * gain units) * @return cosine distance between x and y (between 0 and 1) */ static double pvq_search_rdo_double(const od_val16 *xcoeff, int n, int k, od_coeff *ypulse, double g2) { int i, j; double xy; double yy; /* TODO - This blows our 8kB stack space budget and should be fixed when converting PVQ to fixed point. */ double x[MAXN]; double xx; double lambda; double norm_1; int rdo_pulses; double delta_rate; xx = xy = yy = 0; for (j = 0; j < n; j++) { x[j] = fabs((float)xcoeff[j]); xx += x[j]*x[j]; } norm_1 = 1./sqrt(1e-30 + xx); lambda = OD_PVQ_LAMBDA/(1e-30 + g2); i = 0; if (k > 2) { double l1_norm; double l1_inv; l1_norm = 0; for (j = 0; j < n; j++) l1_norm += x[j]; l1_inv = 1./OD_MAXF(l1_norm, 1e-100); for (j = 0; j < n; j++) { ypulse[j] = OD_MAXI(0, (int)floor(k*x[j]*l1_inv)); xy += x[j]*ypulse[j]; yy += ypulse[j]*ypulse[j]; i += ypulse[j]; } } else { for (j = 0; j < n; j++) ypulse[j] = 0; } /* Only use RDO on the last few pulses. This not only saves CPU, but using RDO on all pulses actually makes the results worse for reasons I don't fully understand. */ rdo_pulses = 1 + k/4; /* Rough assumption for now, the last position costs about 3 bits more than the first. */ delta_rate = 3./n; /* Search one pulse at a time */ for (; i < k - rdo_pulses; i++) { int pos; double best_xy; double best_yy; pos = 0; best_xy = -10; best_yy = 1; for (j = 0; j < n; j++) { double tmp_xy; double tmp_yy; tmp_xy = xy + x[j]; tmp_yy = yy + 2*ypulse[j] + 1; tmp_xy *= tmp_xy; if (j == 0 || tmp_xy*best_yy > best_xy*tmp_yy) { best_xy = tmp_xy; best_yy = tmp_yy; pos = j; } } xy = xy + x[pos]; yy = yy + 2*ypulse[pos] + 1; ypulse[pos]++; } /* Search last pulses with RDO. Distortion is D = (x-y)^2 = x^2 - x*y + y^2 and since x^2 and y^2 are constant, we just maximize x*y, plus a lambda*rate term. Note that since x and y aren't normalized here, we need to divide by sqrt(x^2)*sqrt(y^2). */ for (; i < k; i++) { double rsqrt_table[4]; int rsqrt_table_size = 4; int pos; double best_cost; pos = 0; best_cost = -1e5; /*Fill the small rsqrt lookup table with inputs relative to yy. Specifically, the table of n values is filled with rsqrt(yy + 1), rsqrt(yy + 2 + 1) .. rsqrt(yy + 2*(n-1) + 1).*/ od_fill_dynamic_rqrt_table(rsqrt_table, rsqrt_table_size, yy); for (j = 0; j < n; j++) { double tmp_xy; double tmp_yy; tmp_xy = xy + x[j]; /*Calculate rsqrt(yy + 2*ypulse[j] + 1) using an optimized method.*/ tmp_yy = od_custom_rsqrt_dynamic_table(rsqrt_table, rsqrt_table_size, yy, ypulse[j]); tmp_xy = 2*tmp_xy*norm_1*tmp_yy - lambda*j*delta_rate; if (j == 0 || tmp_xy > best_cost) { best_cost = tmp_xy; pos = j; } } xy = xy + x[pos]; yy = yy + 2*ypulse[pos] + 1; ypulse[pos]++; } for (i = 0; i < n; i++) { if (xcoeff[i] < 0) ypulse[i] = -ypulse[i]; } return xy/(1e-100 + sqrt(xx*yy)); }
/** Perform PVQ quantization with prediction, trying several * possible gains and angles. See draft-valin-videocodec-pvq and * http://jmvalin.ca/slides/pvq.pdf for more details. * * @param [out] out coefficients after quantization * @param [in] x0 coefficients before quantization * @param [in] r0 reference, aka predicted coefficients * @param [in] n number of dimensions * @param [in] q0 quantization step size * @param [out] y pulse vector (i.e. selected PVQ codevector) * @param [out] itheta angle between input and reference (-1 if noref) * @param [out] max_theta maximum value of itheta that could have been * @param [out] vk total number of pulses * @param [in] beta per-band activity masking beta param * @param [out] skip_diff distortion cost of skipping this block * (accumulated) * @param [in] robust make stream robust to error in the reference * @param [in] is_keyframe whether we're encoding a keyframe * @param [in] pli plane index * @param [in] adapt probability adaptation context * @param [in] qm QM with magnitude compensation * @param [in] qm_inv Inverse of QM with magnitude compensation * @return gain index of the quatized gain */ static int pvq_theta(od_coeff *out, const od_coeff *x0, const od_coeff *r0, int n, int q0, od_coeff *y, int *itheta, int *max_theta, int *vk, double beta, double *skip_diff, int robust, int is_keyframe, int pli, const od_adapt_ctx *adapt, const int16_t *qm, const int16_t *qm_inv) { od_val32 g; od_val32 gr; od_coeff y_tmp[MAXN]; int i; /* Number of pulses. */ int k; /* Companded gain of x and reference, normalized to q. */ od_val32 cg; od_val32 cgr; int icgr; int qg; /* Best RDO cost (D + lamdba*R) so far. */ double best_cost; /* Distortion (D) that corresponds to the best RDO cost. */ double best_dist; double dist; /* Sign of Householder reflection. */ int s; /* Dimension on which Householder reflects. */ int m; od_val32 theta; double corr; int best_k; od_val32 best_qtheta; od_val32 gain_offset; int noref; double lambda; double skip_dist; int cfl_enabled; int skip; double gain_weight; od_val16 x16[MAXN]; od_val16 r16[MAXN]; int xshift; int rshift; lambda = OD_PVQ_LAMBDA; /* Give more weight to gain error when calculating the total distortion. */ gain_weight = 1.4; OD_ASSERT(n > 1); corr = 0; #if !defined(OD_FLOAT_PVQ) /* Shift needed to make x fit in 16 bits even after rotation. This shift value is not normative (it can be changed without breaking the bitstream) */ xshift = OD_MAXI(0, od_vector_log_mag(x0, n) - 15); /* Shift needed to make the reference fit in 15 bits, so that the Householder vector can fit in 16 bits. This shift value *is* normative, and has to match the decoder. */ rshift = OD_MAXI(0, od_vector_log_mag(r0, n) - 14); #else xshift = 0; rshift = 0; #endif for (i = 0; i < n; i++) { #if defined(OD_FLOAT_PVQ) /*This is slightly different from the original float PVQ code, where the qm was applied in the accumulation in od_pvq_compute_gain and the vectors were od_coeffs, not od_val16 (i.e. double).*/ x16[i] = x0[i]*(double)qm[i]*OD_QM_SCALE_1; r16[i] = r0[i]*(double)qm[i]*OD_QM_SCALE_1; #else x16[i] = OD_SHR_ROUND(x0[i]*qm[i], OD_QM_SHIFT + xshift); r16[i] = OD_SHR_ROUND(r0[i]*qm[i], OD_QM_SHIFT + rshift); #endif corr += OD_MULT16_16(x16[i], r16[i]); } cfl_enabled = is_keyframe && pli != 0 && !OD_DISABLE_CFL; cg = od_pvq_compute_gain(x16, n, q0, &g, beta, xshift); cgr = od_pvq_compute_gain(r16, n, q0, &gr, beta, rshift); if (cfl_enabled) cgr = OD_CGAIN_SCALE; /* gain_offset is meant to make sure one of the quantized gains has exactly the same gain as the reference. */ #if defined(OD_FLOAT_PVQ) icgr = (int)floor(.5 + cgr); #else icgr = OD_SHR_ROUND(cgr, OD_CGAIN_SHIFT); #endif gain_offset = cgr - OD_SHL(icgr, OD_CGAIN_SHIFT); /* Start search with null case: gain=0, no pulse. */ qg = 0; dist = gain_weight*cg*cg*OD_CGAIN_SCALE_2; best_dist = dist; best_cost = dist + lambda*od_pvq_rate(0, 0, -1, 0, adapt, NULL, 0, n, is_keyframe, pli); noref = 1; best_k = 0; *itheta = -1; *max_theta = 0; OD_CLEAR(y, n); best_qtheta = 0; m = 0; s = 1; corr = corr/(1e-100 + g*(double)gr/OD_SHL(1, xshift + rshift)); corr = OD_MAXF(OD_MINF(corr, 1.), -1.); if (is_keyframe) skip_dist = gain_weight*cg*cg*OD_CGAIN_SCALE_2; else { skip_dist = gain_weight*(cg - cgr)*(cg - cgr) + cgr*(double)cg*(2 - 2*corr); skip_dist *= OD_CGAIN_SCALE_2; } if (!is_keyframe) { /* noref, gain=0 isn't allowed, but skip is allowed. */ od_val32 scgr; scgr = OD_MAXF(0,gain_offset); if (icgr == 0) { best_dist = gain_weight*(cg - scgr)*(cg - scgr) + scgr*(double)cg*(2 - 2*corr); best_dist *= OD_CGAIN_SCALE_2; } best_cost = best_dist + lambda*od_pvq_rate(0, icgr, 0, 0, adapt, NULL, 0, n, is_keyframe, pli); best_qtheta = 0; *itheta = 0; *max_theta = 0; noref = 0; } if (n <= OD_MAX_PVQ_SIZE && !od_vector_is_null(r0, n) && corr > 0) { od_val16 xr[MAXN]; int gain_bound; gain_bound = OD_SHR(cg - gain_offset, OD_CGAIN_SHIFT); /* Perform theta search only if prediction is useful. */ theta = OD_ROUND32(OD_THETA_SCALE*acos(corr)); m = od_compute_householder(r16, n, gr, &s, rshift); od_apply_householder(xr, x16, r16, n); for (i = m; i < n - 1; i++) xr[i] = xr[i + 1]; /* Search for the best gain within a reasonable range. */ for (i = OD_MAXI(1, gain_bound - 1); i <= gain_bound + 1; i++) { int j; od_val32 qcg; int ts; /* Quantized companded gain */ qcg = OD_SHL(i, OD_CGAIN_SHIFT) + gain_offset; /* Set angular resolution (in ra) to match the encoded gain */ ts = od_pvq_compute_max_theta(qcg, beta); /* Search for the best angle within a reasonable range. */ for (j = OD_MAXI(0, (int)floor(.5 + theta*OD_THETA_SCALE_1*2/M_PI*ts) - 2); j <= OD_MINI(ts - 1, (int)ceil(theta*OD_THETA_SCALE_1*2/M_PI*ts)); j++) { double cos_dist; double cost; double dist_theta; double sin_prod; od_val32 qtheta; qtheta = od_pvq_compute_theta(j, ts); k = od_pvq_compute_k(qcg, j, qtheta, 0, n, beta, robust || is_keyframe); sin_prod = od_pvq_sin(theta)*OD_TRIG_SCALE_1*od_pvq_sin(qtheta)* OD_TRIG_SCALE_1; /* PVQ search, using a gain of qcg*cg*sin(theta)*sin(qtheta) since that's the factor by which cos_dist is multiplied to get the distortion metric. */ cos_dist = pvq_search_rdo_double(xr, n - 1, k, y_tmp, qcg*(double)cg*sin_prod*OD_CGAIN_SCALE_2); /* See Jmspeex' Journal of Dubious Theoretical Results. */ dist_theta = 2 - 2.*od_pvq_cos(theta - qtheta)*OD_TRIG_SCALE_1 + sin_prod*(2 - 2*cos_dist); dist = gain_weight*(qcg - cg)*(qcg - cg) + qcg*(double)cg*dist_theta; dist *= OD_CGAIN_SCALE_2; /* Do approximate RDO. */ cost = dist + lambda*od_pvq_rate(i, icgr, j, ts, adapt, y_tmp, k, n, is_keyframe, pli); if (cost < best_cost) { best_cost = cost; best_dist = dist; qg = i; best_k = k; best_qtheta = qtheta; *itheta = j; *max_theta = ts; noref = 0; OD_COPY(y, y_tmp, n - 1); } } } } /* Don't bother with no-reference version if there's a reasonable correlation. The only exception is luma on a keyframe because H/V prediction is unreliable. */ if (n <= OD_MAX_PVQ_SIZE && ((is_keyframe && pli == 0) || corr < .5 || cg < (od_val32)(OD_SHL(2, OD_CGAIN_SHIFT)))) { int gain_bound; gain_bound = OD_SHR(cg, OD_CGAIN_SHIFT); /* Search for the best gain (haven't determined reasonable range yet). */ for (i = OD_MAXI(1, gain_bound); i <= gain_bound + 1; i++) { double cos_dist; double cost; od_val32 qcg; qcg = OD_SHL(i, OD_CGAIN_SHIFT); k = od_pvq_compute_k(qcg, -1, -1, 1, n, beta, robust || is_keyframe); cos_dist = pvq_search_rdo_double(x16, n, k, y_tmp, qcg*(double)cg*OD_CGAIN_SCALE_2); /* See Jmspeex' Journal of Dubious Theoretical Results. */ dist = gain_weight*(qcg - cg)*(qcg - cg) + qcg*(double)cg*(2 - 2*cos_dist); dist *= OD_CGAIN_SCALE_2; /* Do approximate RDO. */ cost = dist + lambda*od_pvq_rate(i, 0, -1, 0, adapt, y_tmp, k, n, is_keyframe, pli); if (cost <= best_cost) { best_cost = cost; best_dist = dist; qg = i; noref = 1; best_k = k; *itheta = -1; *max_theta = 0; OD_COPY(y, y_tmp, n); } } } k = best_k; theta = best_qtheta; skip = 0; if (noref) { if (qg == 0) skip = OD_PVQ_SKIP_ZERO; } else { if (!is_keyframe && qg == 0) { skip = (icgr ? OD_PVQ_SKIP_ZERO : OD_PVQ_SKIP_COPY); } if (qg == icgr && *itheta == 0 && !cfl_enabled) skip = OD_PVQ_SKIP_COPY; } /* Synthesize like the decoder would. */ if (skip) { if (skip == OD_PVQ_SKIP_COPY) OD_COPY(out, r0, n); else OD_CLEAR(out, n); } else { if (noref) gain_offset = 0; g = od_gain_expand(OD_SHL(qg, OD_CGAIN_SHIFT) + gain_offset, q0, beta); od_pvq_synthesis_partial(out, y, r16, n, noref, g, theta, m, s, qm_inv); } *vk = k; *skip_diff += skip_dist - best_dist; /* Encode gain differently depending on whether we use prediction or not. Special encoding on inter frames where qg=0 is allowed for noref=0 but not noref=1.*/ if (is_keyframe) return noref ? qg : neg_interleave(qg, icgr); else return noref ? qg - 1 : neg_interleave(qg + 1, icgr + 1); }