static void oc_mcenc_find_candidates(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc, oc_mv _accum,int _mbi,int _frame){ oc_mb_enc_info *embs; int accum_x; int accum_y; int a[3][2]; int ncandidates; unsigned nmbi; int i; embs=_enc->mb_info; /*Skip a position to store the median predictor in.*/ ncandidates=1; if(embs[_mbi].ncneighbors>0){ /*Fill in the first part of set A: the vectors from adjacent blocks.*/ for(i=0;i<embs[_mbi].ncneighbors;i++){ nmbi=embs[_mbi].cneighbors[i]; _mcenc->candidates[ncandidates][0]= OC_MV_X(embs[nmbi].analysis_mv[0][_frame]); _mcenc->candidates[ncandidates][1]= OC_MV_Y(embs[nmbi].analysis_mv[0][_frame]); ncandidates++; } } accum_x=OC_MV_X(_accum); accum_y=OC_MV_Y(_accum); /*Add a few additional vectors to set A: the vectors used in the previous frames and the (0,0) vector.*/ _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,accum_x,31); _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,accum_y,31); ncandidates++; _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31, OC_MV_X(embs[_mbi].analysis_mv[1][_frame])+accum_x,31); _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31, OC_MV_Y(embs[_mbi].analysis_mv[1][_frame])+accum_y,31); ncandidates++; _mcenc->candidates[ncandidates][0]=0; _mcenc->candidates[ncandidates][1]=0; ncandidates++; /*Use the first three vectors of set A to find our best predictor: their median.*/ memcpy(a,_mcenc->candidates+1,sizeof(a)); OC_SORT2I(a[0][0],a[1][0]); OC_SORT2I(a[0][1],a[1][1]); OC_SORT2I(a[1][0],a[2][0]); OC_SORT2I(a[1][1],a[2][1]); OC_SORT2I(a[0][0],a[1][0]); OC_SORT2I(a[0][1],a[1][1]); _mcenc->candidates[0][0]=a[1][0]; _mcenc->candidates[0][1]=a[1][1]; /*Fill in set B: accelerated predictors for this and adjacent macro blocks.*/ _mcenc->setb0=ncandidates; /*The first time through the loop use the current macro block.*/ nmbi=_mbi; for(i=0;;i++){ _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31, 2*OC_MV_X(embs[_mbi].analysis_mv[1][_frame]) -OC_MV_X(embs[_mbi].analysis_mv[2][_frame])+accum_x,31); _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31, 2*OC_MV_Y(embs[_mbi].analysis_mv[1][_frame]) -OC_MV_Y(embs[_mbi].analysis_mv[2][_frame])+accum_y,31); ncandidates++; if(i>=embs[_mbi].npneighbors)break; nmbi=embs[_mbi].pneighbors[i]; } /*Truncate to full-pel positions.*/ for(i=0;i<ncandidates;i++){ _mcenc->candidates[i][0]=OC_DIV2(_mcenc->candidates[i][0]); _mcenc->candidates[i][1]=OC_DIV2(_mcenc->candidates[i][1]); } _mcenc->ncandidates=ncandidates; }
/*Perform a motion vector search for this macro block against a single reference frame. As a bonus, individual block motion vectors are computed as well, as much of the work can be shared. The actual motion vector is stored in the appropriate place in the oc_mb_enc_info structure. _accum: Drop frame/golden MV accumulators. _mbi: The macro block index. _frame: The frame to use for SATD calculations and refinement, either OC_FRAME_PREV or OC_FRAME_GOLD. _frame_full: The frame to perform the 1px search on, one of OC_FRAME_PREV, OC_FRAME_GOLD, OC_FRAME_PREV_ORIG, or OC_FRAME_GOLD_ORIG.*/ void oc_mcenc_search_frame(oc_enc_ctx *_enc,oc_mv _accum,int _mbi,int _frame, int _frame_full){ /*Note: Traditionally this search is done using a rate-distortion objective function of the form D+lambda*R. However, xiphmont tested this and found it produced a small degredation, while requiring extra computation. This is most likely due to Theora's peculiar MV encoding scheme: MVs are not coded relative to a predictor, and the only truly cheap way to use a MV is in the LAST or LAST2 MB modes, which are not being considered here. Therefore if we use the MV found here, it's only because both LAST and LAST2 performed poorly, and therefore the MB is not likely to be uniform or suffer from the aperture problem. Furthermore we would like to re-use the MV found here for as many MBs as possible, so picking a slightly sub-optimal vector to save a bit or two may cause increased degredation in many blocks to come. We could artificially reduce lambda to compensate, but it's faster to just disable it entirely, and use D (the distortion) as the sole criterion.*/ oc_mcenc_ctx mcenc; const ptrdiff_t *frag_buf_offs; const ptrdiff_t *fragis; const unsigned char *src; const unsigned char *ref; const unsigned char *satd_ref; int ystride; oc_mb_enc_info *embs; ogg_int32_t hit_cache[31]; ogg_int32_t hitbit; unsigned best_block_err[4]; unsigned block_err[4]; unsigned best_err; int best_vec[2]; int best_block_vec[4][2]; int candx; int candy; int bi; embs=_enc->mb_info; /*Find some candidate motion vectors.*/ oc_mcenc_find_candidates_a(_enc,&mcenc,_accum,_mbi,_frame); /*Clear the cache of locations we've examined.*/ memset(hit_cache,0,sizeof(hit_cache)); /*Start with the median predictor.*/ candx=OC_DIV2(mcenc.candidates[0][0]); candy=OC_DIV2(mcenc.candidates[0][1]); hit_cache[candy+15]|=(ogg_int32_t)1<<candx+15; frag_buf_offs=_enc->state.frag_buf_offs; fragis=_enc->state.mb_maps[_mbi][0]; src=_enc->state.ref_frame_data[OC_FRAME_IO]; ref=_enc->state.ref_frame_data[_frame_full]; satd_ref=_enc->state.ref_frame_data[_frame]; ystride=_enc->state.ref_ystride[0]; /*TODO: customize error function for speed/(quality+size) tradeoff.*/ best_err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc, frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err); best_vec[0]=candx; best_vec[1]=candy; if(_frame==OC_FRAME_PREV){ for(bi=0;bi<4;bi++){ best_block_err[bi]=block_err[bi]; best_block_vec[bi][0]=candx; best_block_vec[bi][1]=candy; } } /*If this predictor fails, move on to set A.*/ if(best_err>OC_YSAD_THRESH1){ unsigned err; unsigned t2; int ncs; int ci; /*Compute the early termination threshold for set A.*/ t2=embs[_mbi].error[_frame]; ncs=OC_MINI(3,embs[_mbi].ncneighbors); for(ci=0;ci<ncs;ci++){ t2=OC_MAXI(t2,embs[embs[_mbi].cneighbors[ci]].error[_frame]); } t2+=(t2>>OC_YSAD_THRESH2_SCALE_BITS)+OC_YSAD_THRESH2_OFFSET; /*Examine the candidates in set A.*/ for(ci=1;ci<mcenc.setb0;ci++){ candx=OC_DIV2(mcenc.candidates[ci][0]); candy=OC_DIV2(mcenc.candidates[ci][1]); /*If we've already examined this vector, then we would be using it if it was better than what we are using.*/ hitbit=(ogg_int32_t)1<<candx+15; if(hit_cache[candy+15]&hitbit)continue; hit_cache[candy+15]|=hitbit; err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc, frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err); if(err<best_err){ best_err=err; best_vec[0]=candx; best_vec[1]=candy; } if(_frame==OC_FRAME_PREV){ for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){ best_block_err[bi]=block_err[bi]; best_block_vec[bi][0]=candx; best_block_vec[bi][1]=candy; } } } if(best_err>t2){ oc_mcenc_find_candidates_b(_enc,&mcenc,_accum,_mbi,_frame); /*Examine the candidates in set B.*/ for(;ci<mcenc.ncandidates;ci++){ candx=OC_DIV2(mcenc.candidates[ci][0]); candy=OC_DIV2(mcenc.candidates[ci][1]); hitbit=(ogg_int32_t)1<<candx+15; if(hit_cache[candy+15]&hitbit)continue; hit_cache[candy+15]|=hitbit; err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc, frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err); if(err<best_err){ best_err=err; best_vec[0]=candx; best_vec[1]=candy; } if(_frame==OC_FRAME_PREV){ for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){ best_block_err[bi]=block_err[bi]; best_block_vec[bi][0]=candx; best_block_vec[bi][1]=candy; } } } /*Use the same threshold for set B as in set A.*/ if(best_err>t2){ int best_site; int nsites; int sitei; int site; int b; /*Square pattern search.*/ for(;;){ best_site=4; /*Compose the bit flags for boundary conditions.*/ b=OC_DIV16(-best_vec[0]+1)|OC_DIV16(best_vec[0]+1)<<1| OC_DIV16(-best_vec[1]+1)<<2|OC_DIV16(best_vec[1]+1)<<3; nsites=OC_SQUARE_NSITES[b]; for(sitei=0;sitei<nsites;sitei++){ site=OC_SQUARE_SITES[b][sitei]; candx=best_vec[0]+OC_SQUARE_DX[site]; candy=best_vec[1]+OC_SQUARE_DY[site]; hitbit=(ogg_int32_t)1<<candx+15; if(hit_cache[candy+15]&hitbit)continue; hit_cache[candy+15]|=hitbit; err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc, frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err); if(err<best_err){ best_err=err; best_site=site; } if(_frame==OC_FRAME_PREV){ for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){ best_block_err[bi]=block_err[bi]; best_block_vec[bi][0]=candx; best_block_vec[bi][1]=candy; } } } if(best_site==4)break; best_vec[0]+=OC_SQUARE_DX[best_site]; best_vec[1]+=OC_SQUARE_DY[best_site]; } /*Final 4-MV search.*/ /*Simply use 1/4 of the macro block set A and B threshold as the individual block threshold.*/ if(_frame==OC_FRAME_PREV){ t2>>=2; for(bi=0;bi<4;bi++){ if(best_block_err[bi]>t2){ /*Square pattern search. We do this in a slightly interesting manner. We continue to check the SAD of all four blocks in the macro block. This gives us two things: 1) We can continue to use the hit_cache to avoid duplicate checks. Otherwise we could continue to read it, but not write to it without saving and restoring it for each block. Note that we could still eliminate a large number of duplicate checks by taking into account the site we came from when choosing the site list. We can still do that to avoid extra hit_cache queries, and it might even be a speed win. 2) It gives us a slightly better chance of escaping local minima. We would not be here if we weren't doing a fairly bad job in finding a good vector, and checking these vectors can save us from 100 to several thousand points off our SAD 1 in 15 times. TODO: Is this a good idea? Who knows. It needs more testing.*/ for(;;){ int bestx; int besty; int bj; bestx=best_block_vec[bi][0]; besty=best_block_vec[bi][1]; /*Compose the bit flags for boundary conditions.*/ b=OC_DIV16(-bestx+1)|OC_DIV16(bestx+1)<<1| OC_DIV16(-besty+1)<<2|OC_DIV16(besty+1)<<3; nsites=OC_SQUARE_NSITES[b]; for(sitei=0;sitei<nsites;sitei++){ site=OC_SQUARE_SITES[b][sitei]; candx=bestx+OC_SQUARE_DX[site]; candy=besty+OC_SQUARE_DY[site]; hitbit=(ogg_int32_t)1<<candx+15; if(hit_cache[candy+15]&hitbit)continue; hit_cache[candy+15]|=hitbit; err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc, frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err); if(err<best_err){ best_err=err; best_vec[0]=candx; best_vec[1]=candy; } for(bj=0;bj<4;bj++)if(block_err[bj]<best_block_err[bj]){ best_block_err[bj]=block_err[bj]; best_block_vec[bj][0]=candx; best_block_vec[bj][1]=candy; } } if(best_block_vec[bi][0]==bestx&&best_block_vec[bi][1]==besty){ break; } } } } }