static int mapping0_forward(vorbis_block *vb){ vorbis_dsp_state *vd=vb->vd; vorbis_info *vi=vd->vi; codec_setup_info *ci=vi->codec_setup; private_state *b=vb->vd->backend_state; vorbis_block_internal *vbi=(vorbis_block_internal *)vb->internal; int n=vb->pcmend; int i,j,k; int *nonzero = alloca(sizeof(*nonzero)*vi->channels); float **gmdct = _vorbis_block_alloc(vb,vi->channels*sizeof(*gmdct)); int **ilogmaskch= _vorbis_block_alloc(vb,vi->channels*sizeof(*ilogmaskch)); int ***floor_posts = _vorbis_block_alloc(vb,vi->channels*sizeof(*floor_posts)); float global_ampmax=vbi->ampmax; float *local_ampmax=alloca(sizeof(*local_ampmax)*vi->channels); int blocktype=vbi->blocktype; int modenumber=vb->W; vorbis_info_mapping0 *info=ci->map_param[modenumber]; vorbis_look_psy *psy_look= b->psy+blocktype+(vb->W?2:0); vb->mode=modenumber; for(i=0;i<vi->channels;i++){ float scale=4.f/n; float scale_dB; float *pcm =vb->pcm[i]; float *logfft =pcm; gmdct[i]=_vorbis_block_alloc(vb,n/2*sizeof(**gmdct)); scale_dB=todB(&scale) + .345; /* + .345 is a hack; the original todB estimation used on IEEE 754 compliant machines had a bug that returned dB values about a third of a decibel too high. The bug was harmless because tunings implicitly took that into account. However, fixing the bug in the estimator requires changing all the tunings as well. For now, it's easier to sync things back up here, and recalibrate the tunings in the next major model upgrade. */ #if 0 if(vi->channels==2) if(i==0) _analysis_output("pcmL",seq,pcm,n,0,0,total-n/2); else _analysis_output("pcmR",seq,pcm,n,0,0,total-n/2); #endif /* window the PCM data */ _vorbis_apply_window(pcm,b->window,ci->blocksizes,vb->lW,vb->W,vb->nW); #if 0 if(vi->channels==2) if(i==0) _analysis_output("windowedL",seq,pcm,n,0,0,total-n/2); else _analysis_output("windowedR",seq,pcm,n,0,0,total-n/2); #endif /* transform the PCM data */ /* only MDCT right now.... */ mdct_forward(b->transform[vb->W][0],pcm,gmdct[i]); /* FFT yields more accurate tonal estimation (not phase sensitive) */ drft_forward(&b->fft_look[vb->W],pcm); logfft[0]=scale_dB+todB(pcm) + .345; /* + .345 is a hack; the original todB estimation used on IEEE 754 compliant machines had a bug that returned dB values about a third of a decibel too high. The bug was harmless because tunings implicitly took that into account. However, fixing the bug in the estimator requires changing all the tunings as well. For now, it's easier to sync things back up here, and recalibrate the tunings in the next major model upgrade. */ local_ampmax[i]=logfft[0]; for(j=1;j<n-1;j+=2){ float temp=pcm[j]*pcm[j]+pcm[j+1]*pcm[j+1]; temp=logfft[(j+1)>>1]=scale_dB+.5f*todB(&temp) + .345; /* + .345 is a hack; the original todB estimation used on IEEE 754 compliant machines had a bug that returned dB values about a third of a decibel too high. The bug was harmless because tunings implicitly took that into account. However, fixing the bug in the estimator requires changing all the tunings as well. For now, it's easier to sync things back up here, and recalibrate the tunings in the next major model upgrade. */ if(temp>local_ampmax[i])local_ampmax[i]=temp; } if(local_ampmax[i]>0.f)local_ampmax[i]=0.f; if(local_ampmax[i]>global_ampmax)global_ampmax=local_ampmax[i]; #if 0 if(vi->channels==2){ if(i==0){ _analysis_output("fftL",seq,logfft,n/2,1,0,0); }else{ _analysis_output("fftR",seq,logfft,n/2,1,0,0); } } #endif } { float *noise = _vorbis_block_alloc(vb,n/2*sizeof(*noise)); float *tone = _vorbis_block_alloc(vb,n/2*sizeof(*tone)); for(i=0;i<vi->channels;i++){ /* the encoder setup assumes that all the modes used by any specific bitrate tweaking use the same floor */ int submap=info->chmuxlist[i]; /* the following makes things clearer to *me* anyway */ float *mdct =gmdct[i]; float *logfft =vb->pcm[i]; float *logmdct =logfft+n/2; float *logmask =logfft; vb->mode=modenumber; floor_posts[i]=_vorbis_block_alloc(vb,PACKETBLOBS*sizeof(**floor_posts)); memset(floor_posts[i],0,sizeof(**floor_posts)*PACKETBLOBS); for(j=0;j<n/2;j++) logmdct[j]=todB(mdct+j) + .345; /* + .345 is a hack; the original todB estimation used on IEEE 754 compliant machines had a bug that returned dB values about a third of a decibel too high. The bug was harmless because tunings implicitly took that into account. However, fixing the bug in the estimator requires changing all the tunings as well. For now, it's easier to sync things back up here, and recalibrate the tunings in the next major model upgrade. */ #if 0 if(vi->channels==2){ if(i==0) _analysis_output("mdctL",seq,logmdct,n/2,1,0,0); else _analysis_output("mdctR",seq,logmdct,n/2,1,0,0); }else{ _analysis_output("mdct",seq,logmdct,n/2,1,0,0); } #endif /* first step; noise masking. Not only does 'noise masking' give us curves from which we can decide how much resolution to give noise parts of the spectrum, it also implicitly hands us a tonality estimate (the larger the value in the 'noise_depth' vector, the more tonal that area is) */ _vp_noisemask(psy_look, logmdct, noise); /* noise does not have by-frequency offset bias applied yet */ #if 0 if(vi->channels==2){ if(i==0) _analysis_output("noiseL",seq,noise,n/2,1,0,0); else _analysis_output("noiseR",seq,noise,n/2,1,0,0); } #endif /* second step: 'all the other crap'; all the stuff that isn't computed/fit for bitrate management goes in the second psy vector. This includes tone masking, peak limiting and ATH */ _vp_tonemask(psy_look, logfft, tone, global_ampmax, local_ampmax[i]); #if 0 if(vi->channels==2){ if(i==0) _analysis_output("toneL",seq,tone,n/2,1,0,0); else _analysis_output("toneR",seq,tone,n/2,1,0,0); } #endif /* third step; we offset the noise vectors, overlay tone masking. We then do a floor1-specific line fit. If we're performing bitrate management, the line fit is performed multiple times for up/down tweakage on demand. */ #if 0 { float aotuv[psy_look->n]; #endif _vp_offset_and_mix(psy_look, noise, tone, 1, logmask, mdct, logmdct); #if 0 if(vi->channels==2){ if(i==0) _analysis_output("aotuvM1_L",seq,aotuv,psy_look->n,1,1,0); else _analysis_output("aotuvM1_R",seq,aotuv,psy_look->n,1,1,0); } } #endif #if 0 if(vi->channels==2){ if(i==0) _analysis_output("mask1L",seq,logmask,n/2,1,0,0); else _analysis_output("mask1R",seq,logmask,n/2,1,0,0); } #endif /* this algorithm is hardwired to floor 1 for now; abort out if we're *not* floor1. This won't happen unless someone has broken the encode setup lib. Guard it anyway. */ if(ci->floor_type[info->floorsubmap[submap]]!=1)return(-1); floor_posts[i][PACKETBLOBS/2]= floor1_fit(vb,b->flr[info->floorsubmap[submap]], logmdct, logmask); /* are we managing bitrate? If so, perform two more fits for later rate tweaking (fits represent hi/lo) */ if(vorbis_bitrate_managed(vb) && floor_posts[i][PACKETBLOBS/2]){ /* higher rate by way of lower noise curve */ _vp_offset_and_mix(psy_look, noise, tone, 2, logmask, mdct, logmdct); #if 0 if(vi->channels==2){ if(i==0) _analysis_output("mask2L",seq,logmask,n/2,1,0,0); else _analysis_output("mask2R",seq,logmask,n/2,1,0,0); } #endif floor_posts[i][PACKETBLOBS-1]= floor1_fit(vb,b->flr[info->floorsubmap[submap]], logmdct, logmask); /* lower rate by way of higher noise curve */ _vp_offset_and_mix(psy_look, noise, tone, 0, logmask, mdct, logmdct); #if 0 if(vi->channels==2) if(i==0) _analysis_output("mask0L",seq,logmask,n/2,1,0,0); else _analysis_output("mask0R",seq,logmask,n/2,1,0,0); #endif floor_posts[i][0]= floor1_fit(vb,b->flr[info->floorsubmap[submap]], logmdct, logmask); /* we also interpolate a range of intermediate curves for intermediate rates */ for(k=1;k<PACKETBLOBS/2;k++) floor_posts[i][k]= floor1_interpolate_fit(vb,b->flr[info->floorsubmap[submap]], floor_posts[i][0], floor_posts[i][PACKETBLOBS/2], k*65536/(PACKETBLOBS/2)); for(k=PACKETBLOBS/2+1;k<PACKETBLOBS-1;k++) floor_posts[i][k]= floor1_interpolate_fit(vb,b->flr[info->floorsubmap[submap]], floor_posts[i][PACKETBLOBS/2], floor_posts[i][PACKETBLOBS-1], (k-PACKETBLOBS/2)*65536/(PACKETBLOBS/2)); } } } vbi->ampmax=global_ampmax; /* the next phases are performed once for vbr-only and PACKETBLOB times for bitrate managed modes. 1) encode actual mode being used 2) encode the floor for each channel, compute coded mask curve/res 3) normalize and couple. 4) encode residue 5) save packet bytes to the packetblob vector */ /* iterate over the many masking curve fits we've created */ { float **res_bundle=alloca(sizeof(*res_bundle)*vi->channels); float **couple_bundle=alloca(sizeof(*couple_bundle)*vi->channels); int *zerobundle=alloca(sizeof(*zerobundle)*vi->channels); int **sortindex=alloca(sizeof(*sortindex)*vi->channels); float **mag_memo; int **mag_sort; if(info->coupling_steps){ mag_memo=_vp_quantize_couple_memo(vb, &ci->psy_g_param, psy_look, info, gmdct); mag_sort=_vp_quantize_couple_sort(vb, psy_look, info, mag_memo); hf_reduction(&ci->psy_g_param, psy_look, info, mag_memo); } memset(sortindex,0,sizeof(*sortindex)*vi->channels); if(psy_look->vi->normal_channel_p){ for(i=0;i<vi->channels;i++){ float *mdct =gmdct[i]; sortindex[i]=alloca(sizeof(**sortindex)*n/2); _vp_noise_normalize_sort(psy_look,mdct,sortindex[i]); } } for(k=(vorbis_bitrate_managed(vb)?0:PACKETBLOBS/2); k<=(vorbis_bitrate_managed(vb)?PACKETBLOBS-1:PACKETBLOBS/2); k++){ oggpack_buffer *opb=vbi->packetblob[k]; /* start out our new packet blob with packet type and mode */ /* Encode the packet type */ oggpack_write(opb,0,1); /* Encode the modenumber */ /* Encode frame mode, pre,post windowsize, then dispatch */ oggpack_write(opb,modenumber,b->modebits); if(vb->W){ oggpack_write(opb,vb->lW,1); oggpack_write(opb,vb->nW,1); } /* encode floor, compute masking curve, sep out residue */ for(i=0;i<vi->channels;i++){ int submap=info->chmuxlist[i]; float *mdct =gmdct[i]; float *res =vb->pcm[i]; int *ilogmask=ilogmaskch[i]= _vorbis_block_alloc(vb,n/2*sizeof(**gmdct)); nonzero[i]=floor1_encode(opb,vb,b->flr[info->floorsubmap[submap]], floor_posts[i][k], ilogmask); #if 0 { char buf[80]; sprintf(buf,"maskI%c%d",i?'R':'L',k); float work[n/2]; for(j=0;j<n/2;j++) work[j]=FLOOR1_fromdB_LOOKUP[ilogmask[j]]; _analysis_output(buf,seq,work,n/2,1,1,0); } #endif _vp_remove_floor(psy_look, mdct, ilogmask, res, ci->psy_g_param.sliding_lowpass[vb->W][k]); _vp_noise_normalize(psy_look,res,res+n/2,sortindex[i]); #if 0 { char buf[80]; float work[n/2]; for(j=0;j<n/2;j++) work[j]=FLOOR1_fromdB_LOOKUP[ilogmask[j]]*(res+n/2)[j]; sprintf(buf,"resI%c%d",i?'R':'L',k); _analysis_output(buf,seq,work,n/2,1,1,0); } #endif } /* our iteration is now based on masking curve, not prequant and coupling. Only one prequant/coupling step */ /* quantize/couple */ /* incomplete implementation that assumes the tree is all depth one, or no tree at all */ if(info->coupling_steps){ _vp_couple(k, &ci->psy_g_param, psy_look, info, vb->pcm, mag_memo, mag_sort, ilogmaskch, nonzero, ci->psy_g_param.sliding_lowpass[vb->W][k]); } /* classify and encode by submap */ for(i=0;i<info->submaps;i++){ int ch_in_bundle=0; long **classifications; int resnum=info->residuesubmap[i]; for(j=0;j<vi->channels;j++){ if(info->chmuxlist[j]==i){ zerobundle[ch_in_bundle]=0; if(nonzero[j])zerobundle[ch_in_bundle]=1; res_bundle[ch_in_bundle]=vb->pcm[j]; couple_bundle[ch_in_bundle++]=vb->pcm[j]+n/2; } } classifications=_residue_P[ci->residue_type[resnum]]-> class(vb,b->residue[resnum],couple_bundle,zerobundle,ch_in_bundle); _residue_P[ci->residue_type[resnum]]-> forward(opb,vb,b->residue[resnum], couple_bundle,NULL,zerobundle,ch_in_bundle,classifications); } /* ok, done encoding. Next protopacket. */ } } #if 0 seq++; total+=ci->blocksizes[vb->W]/4+ci->blocksizes[vb->nW]/4; #endif return(0); }
int main(int argc,char *argv[]){ int eos=0; float nonz=0.f; float acc=0.f; float tot=0.f; float ampmax=-9999,newmax; float local_ampmax[2]; int framesize=2048; float ampmax_att_per_sec=-6.; float *pcm[2],*out[2],*window,*flr[2],*mask[2],*work[2]; signed char *buffer,*buffer2; mdct_lookup m_look; drft_lookup f_look; vorbis_look_psy p_look; vorbis_look_psy_global *pg_look; vorbis_look_floor *floor_look; vorbis_info vi; long i,j,k; int ath=0; int decayp=0; argv++; while(*argv){ if(*argv[0]=='-'){ /* option */ if(argv[0][1]=='v'){ noisy=0; } }else if(*argv[0]=='+'){ /* option */ if(argv[0][1]=='v'){ noisy=1; } }else framesize=atoi(argv[0]); argv++; } vi.channels=2; vi.codec_setup=&codec_setup0; pcm[0]=_ogg_malloc(framesize*sizeof(float)); pcm[1]=_ogg_malloc(framesize*sizeof(float)); out[0]=_ogg_calloc(framesize/2,sizeof(float)); out[1]=_ogg_calloc(framesize/2,sizeof(float)); work[0]=_ogg_calloc(framesize,sizeof(float)); work[1]=_ogg_calloc(framesize,sizeof(float)); flr[0]=_ogg_calloc(framesize/2,sizeof(float)); flr[1]=_ogg_calloc(framesize/2,sizeof(float)); buffer=_ogg_malloc(framesize*4); buffer2=buffer+framesize*2; window=_vorbis_window(0,framesize,framesize/2,framesize/2); mdct_init(&m_look,framesize); drft_init(&f_look,framesize); _vp_psy_init(&p_look,&_psy_set0,&_psy_set0G,framesize/2,44100); pg_look=_vp_global_look(&vi); floor_look=_floor_P[1]->look(NULL,NULL,&_floor_set0); /* we cheat on the WAV header; we just bypass 44 bytes and never verify that it matches 16bit/stereo/44.1kHz. */ fread(buffer,1,44,stdin); fwrite(buffer,1,44,stdout); memset(buffer,0,framesize*2); analysis("window",0,window,framesize,0,0); fprintf(stderr,"Processing for frame size %d...\n",framesize); while(!eos){ long bytes=fread(buffer2,1,framesize*2,stdin); if(bytes<framesize*2) memset(buffer2+bytes,0,framesize*2-bytes); if(bytes!=0){ int nonzero[2]; /* uninterleave samples */ for(i=0;i<framesize;i++){ pcm[0][i]=((buffer[i*4+1]<<8)| (0x00ff&(int)buffer[i*4]))/32768.f; pcm[1][i]=((buffer[i*4+3]<<8)| (0x00ff&(int)buffer[i*4+2]))/32768.f; } { float secs=framesize/44100.; ampmax+=secs*ampmax_att_per_sec; if(ampmax<-9999)ampmax=-9999; } for(i=0;i<2;i++){ float scale=4.f/framesize; float *fft=work[i]; float *mdct=pcm[i]; float *logmdct=mdct+framesize/2; analysis("pre",frameno+i,pcm[i],framesize,0,0); /* fft and mdct transforms */ for(j=0;j<framesize;j++) fft[j]=pcm[i][j]*=window[j]; drft_forward(&f_look,fft); local_ampmax[i]=-9999.f; fft[0]*=scale; fft[0]=todB(fft); for(j=1;j<framesize-1;j+=2){ float temp=scale*FAST_HYPOT(fft[j],fft[j+1]); temp=fft[(j+1)>>1]=todB(&temp); if(temp>local_ampmax[i])local_ampmax[i]=temp; } if(local_ampmax[i]>ampmax)ampmax=local_ampmax[i]; mdct_forward(&m_look,pcm[i],mdct); for(j=0;j<framesize/2;j++) logmdct[j]=todB(mdct+j); analysis("mdct",frameno+i,logmdct,framesize/2,1,0); analysis("fft",frameno+i,fft,framesize/2,1,0); } for(i=0;i<2;i++){ float amp; float *fft=work[i]; float *logmax=fft; float *mdct=pcm[i]; float *logmdct=mdct+framesize/2; float *mask=fft+framesize/2; /* floor psychoacoustics */ _vp_compute_mask(&p_look, pg_look, i, fft, logmdct, mask, ampmax, local_ampmax[i], framesize/2); analysis("mask",frameno+i,mask,framesize/2,1,0); { vorbis_block vb; vorbis_dsp_state vd; memset(&vd,0,sizeof(vd)); vd.vi=&vi; vb.vd=&vd; vb.pcmend=framesize; /* floor quantization/application */ nonzero[i]=_floor_P[1]->forward(&vb,floor_look, mdct, logmdct, mask, logmax, flr[i]); } _vp_remove_floor(&p_look, pg_look, logmdct, mdct, flr[i], pcm[i], local_ampmax[i]); for(j=0;j<framesize/2;j++) if(fabs(pcm[i][j])>1500) fprintf(stderr,"%ld ",frameno+i); analysis("res",frameno+i,pcm[i],framesize/2,1,0); analysis("codedflr",frameno+i,flr[i],framesize/2,1,1); } /* residue prequantization */ _vp_partition_prequant(&p_look, &vi, pcm, nonzero); for(i=0;i<2;i++) analysis("quant",frameno+i,pcm[i],framesize/2,1,0); /* channel coupling / stereo quantization */ _vp_couple(&p_look, &mapping_info, pcm, nonzero); for(i=0;i<2;i++) analysis("coupled",frameno+i,pcm[i],framesize/2,1,0); /* decoupling */ for(i=mapping_info.coupling_steps-1;i>=0;i--){ float *pcmM=pcm[mapping_info.coupling_mag[i]]; float *pcmA=pcm[mapping_info.coupling_ang[i]]; for(j=0;j<framesize/2;j++){ float mag=pcmM[j]; float ang=pcmA[j]; if(mag>0) if(ang>0){ pcmM[j]=mag; pcmA[j]=mag-ang; }else{ pcmA[j]=mag; pcmM[j]=mag+ang; } else if(ang>0){ pcmM[j]=mag; pcmA[j]=mag+ang; }else{ pcmA[j]=mag; pcmM[j]=mag-ang; } } } for(i=0;i<2;i++) analysis("decoupled",frameno+i,pcm[i],framesize/2,1,0); for(i=0;i<2;i++){ float amp; for(j=0;j<framesize/2;j++) pcm[i][j]*=flr[i][j]; analysis("final",frameno+i,pcm[i],framesize/2,1,1); /* take it back to time */ mdct_backward(&m_look,pcm[i],pcm[i]); for(j=0;j<framesize/2;j++) out[i][j]+=pcm[i][j]*window[j]; analysis("out",frameno+i,out[i],framesize/2,0,0); } /* write data. Use the part of buffer we're about to shift out */ for(i=0;i<2;i++){ char *ptr=buffer+i*2; float *mono=out[i]; int flag=0; for(j=0;j<framesize/2;j++){ int val=mono[j]*32767.; /* might as well guard against clipping */ if(val>32767){ if(!flag)fprintf(stderr,"clipping in frame %ld ",frameno+i); flag=1; val=32767; } if(val<-32768){ if(!flag)fprintf(stderr,"clipping in frame %ld ",frameno+i); flag=1; val=-32768; } ptr[0]=val&0xff; ptr[1]=(val>>8)&0xff; ptr+=4; } } fprintf(stderr,"*"); fwrite(buffer,1,framesize*2,stdout); memmove(buffer,buffer2,framesize*2); for(i=0;i<2;i++){ for(j=0,k=framesize/2;j<framesize/2;j++,k++) out[i][j]=pcm[i][k]*window[k]; } frameno+=2; }else