int AverageFFT(short* src,int startPos,int srcBufMask,CleanNoiseWork* cnw) { int i; const int logN=cnw->logN; const int n=(1<<logN); const int n_d2=(1<<(logN-1)); const int n_d4=(1<<(logN-2)); const int n3_d4=(3<<(logN-2)); cmplx vect[n]; short outputBuffer[n]; short* pSamples; npd power[PROFILE_SIZE]; npd_p1 powerSum; fft_forward(vect,src,logN); // PrintTable(vect,logN,AFTER_FFT_SHIFT,1); // printf("e\r\n"); // printNpd_p1("a noiseSum: 0x",&cnw->noiseSum); CalcPowerSqroot(&power[0],&powerSum,vect,logN); // printNpd_p1("b noiseSum: 0x",&cnw->noiseSum); #ifdef NOISE_ACCUM for (i=0; i<PROFILE_SIZE; i++) AddNPD_P1(&cnw->noiseAccum[i],&power[i]); cnw->noiseCnt++; if (cnw->noiseCnt >= REP_FFT_CNT) return -1; #endif return n; }
void AutoTalent::init(unsigned long sr) { unsigned long ti; fs = sr; aref = 440; if (fs >=88200) { cbsize = 4096; } else { cbsize = 2048; } corrsize = cbsize / 2 + 1; pmax = 1/(float)70; // max and min periods (ms) pmin = 1/(float)700; // eventually may want to bring these out as sliders pperiod = pmax; nmax = (unsigned long)(fs * pmax); if (nmax > corrsize) { nmax = corrsize; } nmin = (unsigned long)(fs * pmin); cbi = (float*) calloc(cbsize, sizeof(float)); cbo = (float*) calloc(cbsize, sizeof(float)); cbonorm = (float*) calloc(cbsize, sizeof(float)); cbiwr = 0; cbord = 0; // Standard raised cosine window, max height at N/2 hannwindow = (float*) calloc(cbsize, sizeof(float)); for (ti=0; ti<cbsize; ti++) { hannwindow[ti] = -0.5*cos(2*PI*ti/(cbsize - 1)) + 0.5; } // Generate a window with a single raised cosine from N/4 to 3N/4 cbwindow = (float*) calloc(cbsize, sizeof(float)); for (ti=0; ti<(cbsize / 2); ti++) { cbwindow[ti+cbsize/4] = -0.5*cos(4*PI*ti/(cbsize - 1)) + 0.5; } noverlap = 4; fmembvars = fft_con(cbsize); ffttime = (float*) calloc(cbsize, sizeof(float)); fftfreqre = (float*) calloc(corrsize, sizeof(float)); fftfreqim = (float*) calloc(corrsize, sizeof(float)); // ---- Calculate autocorrelation of window ---- acwinv = (float*) calloc(cbsize, sizeof(float)); for (ti=0; ti<cbsize; ti++) { ffttime[ti] = cbwindow[ti]; } fft_forward(fmembvars, cbwindow, fftfreqre, fftfreqim); for (ti=0; ti<corrsize; ti++) { fftfreqre[ti] = (fftfreqre[ti])*(fftfreqre[ti]) + (fftfreqim[ti])*(fftfreqim[ti]); fftfreqim[ti] = 0; } fft_inverse(fmembvars, fftfreqre, fftfreqim, ffttime); for (ti=1; ti<cbsize; ti++) { acwinv[ti] = ffttime[ti]/ffttime[0]; if (acwinv[ti] > 0.000001) { acwinv[ti] = (float)1/acwinv[ti]; } else { acwinv[ti] = 0; } } acwinv[0] = 1; // ---- END Calculate autocorrelation of window ---- lrshift = 0; ptarget = 0; sptarget = 0; wasvoiced = 0; persistamt = 0; glidepersist = 100; // 100 ms glide persist vthresh = 0.8; // The voiced confidence (unbiased peak) threshold level // Pitch shifter initialization phprdd = 0.01; // Default period phprd = phprdd; phinc = (float)1/(phprd * fs); phincfact = 1; phasein = 0; phaseout = 0; frag = (float*) calloc(cbsize, sizeof(float)); fragsize = 0; }
/** This is the main loop where we'll process our samples */ void AutoTalent::ProcessDoubleReplacing(double** inputs, double** outputs, int nFrames) { // Mutex is already locked for us. double* in1 = inputs[0]; double* out1 = outputs[0]; double* out2 = outputs[1]; // copy struct variables to local /* float fMix = fMix; float fShift = fShift; float fTune = fTune; float fA = fA; float fBb = fBb; float fB = fB; float fC = fC; float fDb = fDb; float fD = fD; float fEb = fEb; float fE = fE; float fF = fF; float fGb = fGb; float fG = fG; float fAb = fAb; float fGlide = fGlide; float fAmount = fAmount; */ float fPersist = glidepersist; aref = (float)440*pow(2,fTune/12); unsigned long N = cbsize; unsigned long Nf = corrsize; //unsigned long fs = fs; /* float pmax = pmax; float pmin = pmin; unsigned long nmax = nmax; unsigned long nmin = nmin; float pperiod = pperiod; float pitch = pitch; float conf = conf; float aref = aref; */ // long int ti; long int ti2; long int ti3; float tf; float tf2; float tf3; //double samplesPerBeat = GetSamplesPerBeat(); //double samplePos = (double) GetSamplePos(); for (int s = 0; s < nFrames; ++s, ++in1, ++out1, ++out2) { // load data into circular buffer tf = (float) *in1; cbi[cbiwr] = tf; cbiwr++; if (cbiwr >= N) { cbiwr = 0; } // ******************** // * Low-rate section * // ******************** // Every N/noverlap samples, run pitch estimation / correction code if ((cbiwr)%(N/noverlap) == 0) { // ---- Obtain autocovariance ---- // Window and fill FFT buffer ti2 = (long) cbiwr; for (ti=0; ti<(long)N; ti++) { ffttime[ti] = (float)(cbi[(ti2-ti)%N]*cbwindow[ti]); } // Calculate FFT fft_forward(fmembvars, ffttime, fftfreqre, fftfreqim); // Remove DC fftfreqre[0] = 0; fftfreqim[0] = 0; // Take magnitude squared for (ti=1; ti< (long) Nf; ti++) { fftfreqre[ti] = (fftfreqre[ti])*(fftfreqre[ti]) + (fftfreqim[ti])*(fftfreqim[ti]); fftfreqim[ti] = 0; } // Calculate IFFT fft_inverse(fmembvars, fftfreqre, fftfreqim, ffttime); // Normalize for (ti=1; ti<(long)N; ti++) { ffttime[ti] = ffttime[ti] / ffttime[0]; } ffttime[0] = 1; // ---- END Obtain autocovariance ---- // ---- Calculate pitch and confidence ---- // Calculate pitch period // Pitch period is determined by the location of the max (biased) // peak within a given range // Confidence is determined by the corresponding unbiased height tf2 = 0; pperiod = pmin; for (ti=nmin; ti<(long)nmax; ti++) { ti2 = ti-1; ti3 = ti+1; if (ti2<0) { ti2 = 0; } if (ti3>(long)Nf) { ti3 = Nf; } tf = ffttime[ti]; if (tf>ffttime[ti2] && tf>=ffttime[ti3] && tf>tf2) { tf2 = tf; conf = tf*acwinv[ti]; pperiod = (float)ti/fs; } } // Convert to semitones pitch = (float) -12*log10((float)aref*pperiod)*L2SC; pitch = pitch; pperiod = pperiod; conf = conf; // ---- END Calculate pitch and confidence ---- // ---- Determine pitch target ---- // If voiced if (conf>=vthresh) { // TODO: Scale sliders // Determine pitch target tf = -1; tf2 = 0; tf3 = 0; for (ti=0; ti<12; ti++) { switch (ti) { case 0: tf2 = fNotes[9]; break; case 1: tf2 = fNotes[10]; break; case 2: tf2 = fNotes[11]; break; case 3: tf2 = fNotes[0]; break; case 4: tf2 = fNotes[1]; break; case 5: tf2 = fNotes[2]; break; case 6: tf2 = fNotes[3]; break; case 7: tf2 = fNotes[4]; break; case 8: tf2 = fNotes[5]; break; case 9: tf2 = fNotes[6]; break; case 10: tf2 = fNotes[7]; break; case 11: tf2 = fNotes[8]; break; } /* if (ti==ptarget) { */ /* tf2 = tf2 + 0.01; // add a little hysteresis */ /* } */ tf2 = tf2 - (float)fabs( (pitch-(float)ti)/6 - 2*floorf(((pitch-(float)ti)/12 + 0.5)) ); // like a likelihood function if (tf2>=tf) { // that we're maximizing tf3 = (float)ti; // to find the target pitch tf = tf2; } } ptarget = tf3; // Glide persist if (wasvoiced == 0) { wasvoiced = 1; tf = persistamt; sptarget = (1-tf)*ptarget + tf*sptarget; persistamt = 1; } // Glide on circular scale tf3 = (float)ptarget - sptarget; tf3 = tf3 - (float)12*floorf(tf3/12 + 0.5); if (fGlide>0) { tf2 = (float)1-pow((float)1/24, (float)N * 1000/ (noverlap*fs*fGlide)); } else { tf2 = 1; } sptarget = sptarget + tf3*tf2; } // If not voiced else { wasvoiced = 0; // Keep track of persist amount if (fPersist>0) { tf = pow((float)1/2, (float)N * 1000/ (noverlap*fs*fPersist)); } else { tf = 0; } persistamt = persistamt * tf; // Persist amount decays exponentially } // END If voiced // ---- END Determine pitch target ---- // ---- Determine correction to feed to the pitch shifter ---- tf = sptarget - pitch; // Correction amount tf = tf - (float)12*floorf(tf/12 + 0.5); // Never do more than +- 6 semitones of correction if (conf<vthresh) { tf = 0; } lrshift = fShift + fAmount*tf; // Add in pitch shift slider // ---- Compute variables for pitch shifter that depend on pitch --- phincfact = (float)pow(2, lrshift/12); if (conf>=vthresh) { // Keep old period when unvoiced phinc = (float)1/(pperiod*fs); phprd = pperiod*2; } } // ************************ // * END Low-Rate Section * // ************************ // ***************** // * Pitch Shifter * // ***************** // TODO: Pre-filter with some kind of filter (maybe cheby2 or just svf) // TODO: Use cubic spline interpolation // IMPROVE QUALITY OF PITCH SHIFTER! // what is the glitch at "lAaAack"? probably pitch shifter // Better snippet management // Pre-filter // Cubic spline interp // Pitch shifter (overlap-add, pitch synchronous) // Note: pitch estimate is naturally N/2 samples old phasein = phasein + phinc; phaseout = phaseout + phinc*phincfact; // If it happens that there are no snippets placed at the output, grab a new snippet! /* if (cbonorm[((long int)cbord + (long int)(N/2*(1 - (float)1 / phincfact)))%N] < 0.2) { */ /* fprintf(stderr, "help!"); */ /* phasein = 1; */ /* phaseout = 1; */ /* } */ // When input phase resets, take a snippet from N/2 samples in the past if (phasein >= 1) { phasein = phasein - 1; ti2 = cbiwr - (long int)N/2; for (ti=-((long int)N)/2; ti<(long int)N/2; ti++) { frag[ti%N] = cbi[(ti + ti2)%N]; } } // When output phase resets, put a snippet N/2 samples in the future if (phaseout >= 1) { fragsize = fragsize*2; if (fragsize >= N) { fragsize = N; } phaseout = phaseout - 1; ti2 = cbord + N/2; ti3 = (long int)(((float)fragsize) / phincfact); for (ti=-ti3/2; ti<(ti3/2); ti++) { tf = hannwindow[(long int)N/2 + ti*(long int)N/ti3]; cbo[(ti + ti2)%N] = cbo[(ti + ti2)%N] + frag[((int)(phincfact*ti))%N]*tf; cbonorm[(ti + ti2)%N] = cbonorm[(ti + ti2)%N] + tf; } fragsize = 0; } fragsize++; // Get output signal from buffer tf = cbonorm[cbord]; // Normalize if (tf>0.5) { tf = (float)1/tf; } else { tf = 1; } tf = tf*cbo[cbord]; // read buffer tf = cbo[cbord]; cbo[cbord] = 0; // erase for next cycle cbonorm[cbord] = 0; cbord++; // increment read pointer if (cbord >= N) { cbord = 0; } // ********************* // * END Pitch Shifter * // ********************* // Write audio to output of plugin // Mix (blend between original (delayed) =0 and shifted/corrected =1) *out1 = *out2 = (double) fMix*tf + (1-fMix)*cbi[(cbiwr - N + 1)%N]; } }
/*! Determine Endian-ness of the CD-drive based on reading data from it. Some drives return audio data Big Endian while some (most) return data Little Endian. Drives known to return data bigendian are SCSI drives from Kodak, Ricoh, HP, Philips, Plasmon, Grundig CDR100IPW, and Mitsumi CD-R. ATAPI and MMC drives are little endian. rocky: As someone who didn't write the code, I have to say this is nothing less than brilliant. An FFT is done both ways and the the transform is looked at to see which has data in the FFT (or audible) portion. (Or so that's how I understand it.) @return 1 if big-endian, 0 if little-endian, -1 if we couldn't figure things out or some error. */ int data_bigendianp(cdrom_drive_t *d) { float lsb_votes=0; float msb_votes=0; int i,checked; int endiancache=d->bigendianp; float *a=calloc(1024,sizeof(float)); float *b=calloc(1024,sizeof(float)); long readsectors=5; int16_t *buff=malloc(readsectors*CDIO_CD_FRAMESIZE_RAW*sizeof(int16_t)); memset(buff, 0, readsectors*CDIO_CD_FRAMESIZE_RAW*sizeof(int16_t)); /* look at the starts of the audio tracks */ /* if real silence, tool in until some static is found */ /* Force no swap for now */ d->bigendianp=-1; cdmessage(d,"\nAttempting to determine drive endianness from data..."); d->enable_cdda(d,1); for(i=0,checked=0;i<d->tracks;i++){ float lsb_energy=0; float msb_energy=0; if(cdda_track_audiop(d,i+1)==1){ long firstsector=cdda_track_firstsector(d,i+1); long lastsector=cdda_track_lastsector(d,i+1); int zeroflag=-1; long beginsec=0; /* find a block with nonzero data */ while(firstsector+readsectors<=lastsector){ int j; if(d->read_audio(d,buff,firstsector,readsectors)>0){ /* Avoid scanning through jitter at the edges */ for(beginsec=0;beginsec<readsectors;beginsec++){ int offset=beginsec*CDIO_CD_FRAMESIZE_RAW/2; /* Search *half* */ for(j=460;j<128+460;j++) if(buff[offset+j]!=0){ zeroflag=0; break; } if(!zeroflag)break; } if(!zeroflag)break; firstsector+=readsectors; }else{ d->enable_cdda(d,0); free(a); free(b); free(buff); return(-1); } } beginsec*=CDIO_CD_FRAMESIZE_RAW/2; /* un-interleave for an FFT */ if(!zeroflag){ int j; for(j=0;j<128;j++) a[j] = le16_to_cpu(buff[j*2+beginsec+460]); for(j=0;j<128;j++) b[j] = le16_to_cpu(buff[j*2+beginsec+461]); fft_forward(128,a,NULL,NULL); fft_forward(128,b,NULL,NULL); for(j=0;j<128;j++) lsb_energy+=fabs(a[j])+fabs(b[j]); for(j=0;j<128;j++) a[j] = be16_to_cpu(buff[j*2+beginsec+460]); for(j=0;j<128;j++) b[j] = be16_to_cpu(buff[j*2+beginsec+461]); fft_forward(128,a,NULL,NULL); fft_forward(128,b,NULL,NULL); for(j=0;j<128;j++) msb_energy+=fabs(a[j])+fabs(b[j]); } } if(lsb_energy<msb_energy){ lsb_votes+=msb_energy/lsb_energy; checked++; }else if(lsb_energy>msb_energy){ msb_votes+=lsb_energy/msb_energy; checked++; } if(checked==5 && (lsb_votes==0 || msb_votes==0))break; cdmessage(d,"."); } free(buff); free(a); free(b); d->bigendianp=endiancache; d->enable_cdda(d,0); /* How did we vote? Be potentially noisy */ if (lsb_votes>msb_votes) { char buffer[256]; cdmessage(d,"\n\tData appears to be coming back Little Endian.\n"); sprintf(buffer,"\tcertainty: %d%%\n",(int) (100.*lsb_votes/(lsb_votes+msb_votes)+.5)); cdmessage(d,buffer); return(0); } else { if(msb_votes>lsb_votes){ char buffer[256]; cdmessage(d,"\n\tData appears to be coming back Big Endian.\n"); sprintf(buffer,"\tcertainty: %d%%\n",(int) (100.*msb_votes/(lsb_votes+msb_votes)+.5)); cdmessage(d,buffer); return(1); } cdmessage(d,"\n\tCannot determine CDROM drive endianness.\n"); return(bigendianp()); } }
//*************************// // Perform Routine PD// //*************************// t_int *autotune_perform(t_int *w) { t_autotune *x = (t_autotune *)(w[1]); // object is first arg t_float *in = (t_float *)(w[2]); t_float *out = (t_float *)(w[3]); unsigned long SampleCount = (unsigned long)(w[4]); // copy struct variables to local /*float fA = x->fA; float fBb = x->fBb; float fB = x->fB; float fC = x->fC; float fDb = x->fDb; float fD = x->fD; float fEb = x->fEb; float fE = x->fE; float fF = x->fF; float fGb = x->fGb; float fG = x->fG; float fAb = x->fAb;*/ //float fGlide = x->fGlide; //float fPersist = x->glidepersist; int iNotes[12]; int iPitch2Note[12]; int iNote2Pitch[12]; int numNotes; float fAmount = x->fAmount; float fSmooth = x->fSmooth * 0.8; float fTune = x->fTune; iNotes[0] = (int) x->fA; iNotes[1] = (int) x->fBb; iNotes[2] = (int) x->fB; iNotes[3] = (int) x->fC; iNotes[4] = (int) x->fDb; iNotes[5] = (int) x->fD; iNotes[6] = (int) x->fEb; iNotes[7] = (int) x->fE; iNotes[8] = (int) x->fF; iNotes[9] = (int) x->fGb; iNotes[10] = (int) x->fG; iNotes[11] = (int) x->fAb; float fFixed = x->fFixed; float fPull = x->fPull; float fShift = x->fShift; int iScwarp = x->fScwarp; float fLfoamp = x->fLfoamp; float fLforate = x->fLforate; float fLfoshape = x->fLfoshape; float fLfosymm = x->fLfosymm; int iLfoquant = x->fLfoquant; int iFcorr = x->fFcorr; float fFwarp = x->fFwarp; float fMix = x->fMix; //x->aref = (float)440*pow(2,fTune/12); unsigned long int lSampleIndex; unsigned long N = x->cbsize; unsigned long Nf = x->corrsize; unsigned long fs = x->fs; float pmax = x->pmax; float pmin = x->pmin; unsigned long nmax = x->nmax; unsigned long nmin = x->nmin; //float pperiod = x->pperiod; //float pitch = x->pitch; // volatile long int ti; volatile long int ti2; volatile long int ti3; volatile long int ti4; volatile float tf; volatile float tf2; volatile float tf3; // Variables for cubic spline interpolator volatile float indd; volatile int ind0; volatile int ind1; volatile int ind2; volatile int ind3; volatile float vald; volatile float val0; volatile float val1; volatile float val2; volatile float val3; volatile int lowersnap; volatile int uppersnap; volatile float lfoval; volatile float pperiod; volatile float inpitch; volatile float conf; volatile float outpitch; volatile float aref; volatile float fa; volatile float fb; volatile float fc; volatile float fk; volatile float flamb; volatile float frlamb; volatile float falph; volatile float foma; volatile float f1resp; volatile float f0resp; volatile float flpa; volatile int ford; // Some logic for the semitone->scale and scale->semitone conversion // If no notes are selected as being in the scale, instead snap to all notes ti2 = 0; for (ti=0; ti<12; ti++) { if (iNotes[ti]>=0) { iPitch2Note[ti] = ti2; iNote2Pitch[ti2] = ti; ti2 = ti2 + 1; } else { iPitch2Note[ti] = -1; } } numNotes = ti2; while (ti2<12) { iNote2Pitch[ti2] = -1; ti2 = ti2 + 1; } if (numNotes==0) { for (ti=0; ti<12; ti++) { iNotes[ti] = 1; iPitch2Note[ti] = ti; iNote2Pitch[ti] = ti; } numNotes = 12; } iScwarp = (iScwarp + numNotes*5)%numNotes; ford = x->ford; falph = x->falph; foma = (float)1 - falph; flpa = x->flpa; flamb = x->flamb; tf = pow((float)2,fFwarp/2)*(1+flamb)/(1-flamb); frlamb = (tf - 1)/(tf + 1); x->aref = (float)fTune; N = x->cbsize; Nf = x->corrsize; fs = x->fs; pmax = x->pmax; pmin = x->pmin; nmax = x->nmax; nmin = x->nmin; aref = x->aref; pperiod = x->pmax; inpitch = x->inpitch; conf = x->conf; outpitch = x->outpitch; //******************// // MAIN DSP LOOP // //******************// for (lSampleIndex = 0; lSampleIndex < SampleCount; lSampleIndex++) { // load data into circular buffer tf = (float) *(in++); ti4 = x->cbiwr; //fprintf(stderr,"ti4=%d N=%d\n", ti4, N); x->cbi[ti4] = tf; /*x->cbiwr++; if (x->cbiwr >= N) { x->cbiwr = 0; }*/ if (iFcorr>=1) { // Somewhat experimental formant corrector // formants are removed using an adaptive pre-filter and // re-introduced after pitch manipulation using post-filter // tf is signal input fa = tf - x->fhp; // highpass pre-emphasis filter x->fhp = tf; fb = fa; for (ti=0; ti<(long)ford; ti++) { x->fsig[ti] = fa*fa*foma + x->fsig[ti]*falph; fc = (fb-x->fc[ti])*flamb + x->fb[ti]; x->fc[ti] = fc; x->fb[ti] = fb; fk = fa*fc*foma + x->fk[ti]*falph; x->fk[ti] = fk; tf = fk/(x->fsig[ti] + 0.000001); tf = tf*foma + x->fsmooth[ti]*falph; x->fsmooth[ti] = tf; x->fbuff[ti][ti4] = tf; fb = fc - tf*fa; fa = fa - tf*fc; } x->cbf[ti4] = fa; // Now hopefully the formants are reduced // More formant correction code at the end of the DSP loop } else { x->cbf[ti4] = tf; } //fprintf(stderr,"x->cbf[ti4]=%f\n", x->cbf[ti4]); // Input write pointer logic x->cbiwr++; if (x->cbiwr >= N) { x->cbiwr = 0; } // ********************// // * Low-rate section *// // ********************// //fprintf(stderr,"overlap=%d outpitch=%f inpitch=%f\n", (x->cbiwr)%(N/x->noverlap), outpitch, inpitch); //fprintf(stderr,"outpitch=%f inpitch=%f\n", outpitch, inpitch); // Every N/noverlap samples, run pitch estimation / correction code if ((x->cbiwr)%(N/x->noverlap) == 0) { //fprintf(stderr,"ti4=%d N=%d\n", ti4, N); // ---- Obtain autocovariance ---- // // Window and fill FFT buffer ti2 = (long) x->cbiwr; for (ti=0; ti<(long)N; ti++) { x->ffttime[ti] = (float)(x->cbi[(ti2-ti)%N]*x->cbwindow[ti]); } // Calculate FFT fft_forward(x->fx, x->ffttime, x->fftfreqre, x->fftfreqim); // Remove DC x->fftfreqre[0] = 0; x->fftfreqim[0] = 0; // Take magnitude squared for (ti=1; ti< (long) Nf; ti++) { x->fftfreqre[ti] = (x->fftfreqre[ti])*(x->fftfreqre[ti]) + (x->fftfreqim[ti])*(x->fftfreqim[ti]); x->fftfreqim[ti] = 0; } // Calculate IFFT fft_inverse(x->fx, x->fftfreqre, x->fftfreqim, x->ffttime); // Normalize for (ti=1; ti<(long)N; ti++) { x->ffttime[ti] = x->ffttime[ti] / x->ffttime[0]; } x->ffttime[0] = 1; // ---- END Obtain autocovariance ---- // ---- Calculate pitch and confidence ---- // Calculate pitch period // Pitch period is determined by the location of the max (biased) // peak within a given range // Confidence is determined by the corresponding unbiased height tf2 = 0; pperiod = pmin; for (ti=nmin; ti<(long)nmax; ti++) { ti2 = ti-1; ti3 = ti+1; if (ti2<0) { ti2 = 0; } if (ti3>(long)Nf) { ti3 = Nf; } tf = x->ffttime[ti]; if (tf>x->ffttime[ti2] && tf>=x->ffttime[ti3] && tf>tf2) { tf2 = tf; ti4 = ti; //conf = tf*x->acwinv[ti]; //pperiod = (float)ti/fs; } } if (tf2>0) { conf = tf2*x->acwinv[ti4]; if (ti4>0 && ti4<(long)Nf) { // Find the center of mass in the vicinity of the detected peak tf = x->ffttime[ti4-1]*(ti4-1); tf = tf + x->ffttime[ti4]*(ti4); tf = tf + x->ffttime[ti4+1]*(ti4+1); tf = tf/(x->ffttime[ti4-1] + x->ffttime[ti4] + x->ffttime[ti4+1]); pperiod = tf/fs; } else { pperiod = (float)ti4/fs; } } // Convert to semitones tf = (float) -12*log10((float)aref*pperiod)*L2SC; //fprintf(stderr,"tf=%f aref=%f pperiod=%f\n", tf, aref, pperiod); //post("pperiod=%f conf=%f\n", pperiod, conf); float pp_test = x->pperiod/(x->pperiod - pperiod); if (pp_test < 0.5 || pp_test > 2) pp_test = 1; else pp_test = 0; if (conf>=x->vthresh && tf == tf) { // second check is for NANs inpitch = tf; x->inpitch = tf; // update pitch only if voiced x->pperiod = pperiod; } x->conf = conf; x->fPitch = inpitch; x->fConf = conf; //x->pitch = pitch; //x->pperiod = pperiod; //x->conf = conf; // ---- END Calculate pitch and confidence ---- /* // ---- Determine pitch target ---- // If voiced if (conf>=x->vthresh) { // TODO: Scale sliders // Determine pitch target tf = -1; tf2 = 0; tf3 = 0; for (ti=0; ti<12; ti++) { switch (ti) { case 0: tf2 = fA; break; case 1: tf2 = fBb; break; case 2: tf2 = fB; break; case 3: tf2 = fC; break; case 4: tf2 = fDb; break; case 5: tf2 = fD; break; case 6: tf2 = fEb; break; case 7: tf2 = fE; break; case 8: tf2 = fF; break; case 9: tf2 = fGb; break; case 10: tf2 = fG; break; case 11: tf2 = fAb; break; } // if (ti==x->ptarget) { // tf2 = tf2 + 0.01; // add a little hysteresis // } tf2 = tf2 - (float)fabs( (pitch-(float)ti)/6 - 2*floorf(((pitch-(float)ti)/12 + 0.5)) ); // like a likelihood function if (tf2>=tf) { // that we're maximizing tf3 = (float)ti; // to find the target pitch tf = tf2; } } x->ptarget = tf3; // Glide persist if (x->wasvoiced == 0) { x->wasvoiced = 1; tf = x->persistamt; x->sptarget = (1-tf)*x->ptarget + tf*x->sptarget; x->persistamt = 1; } // Glide on circular scale tf3 = (float)x->ptarget - x->sptarget; tf3 = tf3 - (float)12*floorf(tf3/12 + 0.5); if (fGlide>0) { tf2 = (float)1-pow((float)1/24, (float)N * 1000/ (x->noverlap*fs*fGlide)); } else { tf2 = 1; } x->sptarget = x->sptarget + tf3*tf2; } // If not voiced else { x->wasvoiced = 0; // Keep track of persist amount if (fPersist>0) { tf = pow((float)1/2, (float)N * 1000/ (x->noverlap*fs*fPersist)); } else { tf = 0; } x->persistamt = x->persistamt * tf; // Persist amount decays exponentially } // END If voiced // ---- END Determine pitch target ---- // ---- Determine correction to feed to the pitch shifter ---- tf = x->sptarget - pitch; // Correction amount tf = tf - (float)12*floorf(tf/12 + 0.5); // Never do more than +- 6 semitones of correction if (conf<x->vthresh) { tf = 0; } x->lrshift = fShift + fAmount*tf; // Add in pitch shift slider // ---- Compute variables for pitch shifter that depend on pitch --- x->phincfact = (float)pow(2, x->lrshift/12); if (conf>=x->vthresh) { // Keep old period when unvoiced x->inphinc = (float)1/(pperiod*fs); x->phprd = pperiod*2; } } // ************************ // * END Low-Rate Section * // ************************ */ //fprintf(stderr,"%f %f %f %f", inpitch, outpitch, pperiod, ti4); // ---- Modify pitch in all kinds of ways! ---- outpitch = inpitch; //fprintf(stderr,"outpitch=%f\n", outpitch); // Pull to fixed pitch // when fPull is 1 (legacy behavior which picks absolute pitch in respect to A intonation) if (fPull <= 1) { outpitch = (1-fPull)*outpitch + fPull*fFixed; } else { // Special pull case when fPull is 2 /*if (fFixed < 0) while (fFixed < 0) fFixed += 12; else if (fFixed > 12) while (fFixed > 12) fFixed -= 12;*/ float inpitch_norm = inpitch; if (inpitch_norm < 6) while (inpitch_norm < 6) inpitch_norm += 12; else if (inpitch_norm > 6) while (inpitch_norm > 6) inpitch_norm -= 12; /*float a = fFixed - inpitch_norm; float b = fFixed - 12 - inpitch_norm; float c = fFixed + 12 - inpitch_norm; float result = a; if (abs(b) < abs(result)) result = b; if (abs(c) < abs(result)) result = c; outpitch = inpitch + result;*/ float a = inpitch - inpitch_norm; float b = inpitch - 12 - inpitch_norm; float c = inpitch + 12 - inpitch_norm; //post("a=%f b=%f c=%f in_norm=%f\n", a, b, c, inpitch_norm); float result = a; if (abs(b) < abs(result)) result = b; if (abs(c) < abs(result)) result = c; outpitch = result + fFixed; //fprintf(stderr,"outpitch=%f inpitch=%f in_norm=%f\n", outpitch, inpitch, inpitch_norm); } // -- Convert from semitones to scale notes -- ti = (int)(outpitch/12 + 32) - 32; // octave tf = outpitch - ti*12; // semitone in octave ti2 = (int)tf; ti3 = ti2 + 1; // a little bit of pitch correction logic, since it's a convenient place for it if (iNotes[ti2%12]<0 || iNotes[ti3%12]<0) { // if between 2 notes that are more than a semitone apart lowersnap = 1; uppersnap = 1; } else { lowersnap = 0; uppersnap = 0; if (iNotes[ti2%12]==1) { // if specified by user lowersnap = 1; } if (iNotes[ti3%12]==1) { // if specified by user uppersnap = 1; } } // (back to the semitone->scale conversion) // finding next lower pitch in scale while (iNotes[(ti2+12)%12]<0) { ti2 = ti2 - 1; } // finding next higher pitch in scale while (iNotes[ti3%12]<0) { ti3 = ti3 + 1; } tf = (tf-ti2)/(ti3-ti2) + iPitch2Note[(ti2+12)%12]; if (ti2<0) { tf = tf - numNotes; } outpitch = tf + numNotes*ti; // -- Done converting to scale notes -- // The actual pitch correction ti = (int)(outpitch+128) - 128; tf = outpitch - ti - 0.5; ti2 = ti3-ti2; if (ti2>2) { // if more than 2 semitones apart, put a 2-semitone-like transition halfway between tf2 = (float)ti2/2; } else { tf2 = (float)1; } if (fSmooth<0.001) { tf2 = tf*tf2/0.001; } else { tf2 = tf*tf2/fSmooth; } if (tf2<-0.5) tf2 = -0.5; if (tf2>0.5) tf2 = 0.5; tf2 = 0.5*sin(PI*tf2) + 0.5; // jumping between notes using horizontally-scaled sine segment tf2 = tf2 + ti; if ( (tf<0.5 && lowersnap) || (tf>=0.5 && uppersnap) ) { outpitch = fAmount*tf2 + ((float)1-fAmount)*outpitch; } // Add in pitch shift outpitch = outpitch + fShift; // LFO logic tf = fLforate*N/(x->noverlap*fs); if (tf>1) tf=1; x->lfophase = x->lfophase + tf; if (x->lfophase>1) x->lfophase = x->lfophase-1; lfoval = x->lfophase; tf = (fLfosymm + 1)/2; if (tf<=0 || tf>=1) { if (tf<=0) lfoval = 1-lfoval; } else { if (lfoval<=tf) { lfoval = lfoval/tf; } else { lfoval = 1 - (lfoval-tf)/(1-tf); } } if (fLfoshape>=0) { // linear combination of cos and line lfoval = (0.5 - 0.5*cos(lfoval*PI))*fLfoshape + lfoval*(1-fLfoshape); lfoval = fLfoamp*(lfoval*2 - 1); } else { // smoosh the sine horizontally until it's squarish tf = 1 + fLfoshape; if (tf<0.001) { lfoval = (lfoval - 0.5)*2/0.001; } else { lfoval = (lfoval - 0.5)*2/tf; } if (lfoval>1) lfoval = 1; if (lfoval<-1) lfoval = -1; lfoval = fLfoamp*sin(lfoval*PI*0.5); } // add in quantized LFO if (iLfoquant>=1) { outpitch = outpitch + (int)(numNotes*lfoval + numNotes + 0.5) - numNotes; } // Convert back from scale notes to semitones outpitch = outpitch + iScwarp; // output scale rotate implemented here ti = (int)(outpitch/numNotes + 32) - 32; tf = outpitch - ti*numNotes; ti2 = (int)tf; ti3 = ti2 + 1; outpitch = iNote2Pitch[ti3%numNotes] - iNote2Pitch[ti2]; if (ti3>=numNotes) { outpitch = outpitch + 12; } outpitch = outpitch*(tf - ti2) + iNote2Pitch[ti2]; outpitch = outpitch + 12*ti; outpitch = outpitch - (iNote2Pitch[iScwarp] - iNote2Pitch[0]); //more scale rotation here // add in unquantized LFO if (iLfoquant<=0) { outpitch = outpitch + lfoval*2; } if (outpitch<-36) outpitch = -48; if (outpitch>24) outpitch = 24; x->outpitch = outpitch; // ---- END Modify pitch in all kinds of ways! ---- // Compute variables for pitch shifter that depend on pitch x->inphinc = aref*pow(2,inpitch/12)/fs; x->outphinc = aref*pow(2,outpitch/12)/fs; x->phincfact = x->outphinc/x->inphinc; } // ************************ // * END Low-Rate Section * // ************************ // ***************** // * Pitch Shifter * // ***************** // Pitch shifter (kind of like a pitch-synchronous version of Fairbanks' technique) // Note: pitch estimate is naturally N/2 samples old x->phasein = x->phasein + x->inphinc; x->phaseout = x->phaseout + x->inphinc*x->phincfact; // If it happens that there are no snippets placed at the output, grab a new snippet! /* if (x->cbonorm[((long int)x->cbord + (long int)(N/2*(1 - (float)1 / x->phincfact)))%N] < 0.2) { */ /* post( "help!"); */ /* x->phasein = 1; */ /* x->phaseout = 1; */ /* } */ // When input phase resets, take a snippet from N/2 samples in the past if (x->phasein >= 1) { x->phasein = x->phasein - 1; ti2 = x->cbiwr - (long int)N/2; for (ti=-((long int)N)/2; ti<(long int)N/2; ti++) { x->frag[ti%N] = x->cbi[(ti + ti2)%N]; } } // When output phase resets, put a snippet N/2 samples in the future if (x->phaseout >= 1) { x->fragsize = x->fragsize*2; if (x->fragsize >= N) { x->fragsize = N; } x->phaseout = x->phaseout - 1; ti2 = x->cbord + N/2; ti3 = (long int)(((float)x->fragsize) / x->phincfact); if (ti3>=(long int)N/2) { ti3 = N/2 - 1; } for (ti=-ti3/2; ti<(ti3/2); ti++) { tf = x->hannwindow[(long int)N/2 + ti*(long int)N/ti3]; // 3rd degree polynomial interpolator - based on eqns from Hal Chamberlin's book indd = x->phincfact*ti; ind1 = (int)indd; ind2 = ind1+1; ind3 = ind1+2; ind0 = ind1-1; val0 = x->frag[(ind0+N)%N]; val1 = x->frag[(ind1+N)%N]; val2 = x->frag[(ind2+N)%N]; val3 = x->frag[(ind3+N)%N]; vald = 0; vald = vald - (float)0.166666666667 * val0 * (indd - ind1) * (indd - ind2) * (indd - ind3); vald = vald + (float)0.5 * val1 * (indd - ind0) * (indd - ind2) * (indd - ind3); vald = vald - (float)0.5 * val2 * (indd - ind0) * (indd - ind1) * (indd - ind3); vald = vald + (float)0.166666666667 * val3 * (indd - ind0) * (indd - ind1) * (indd - ind2); x->cbo[(ti + ti2 + N)%N] = x->cbo[(ti + ti2 + N)%N] + vald*tf; } x->fragsize = 0; } x->fragsize++; // Get output signal from buffer tf = x->cbo[x->cbord]; /*// Normalize if (tf>0.5) { tf = (float)1/tf; } else { tf = 1; }*/ //tf = tf*x->cbo[x->cbord]; // read buffer tf = x->cbo[x->cbord]; x->cbo[x->cbord] = 0; // erase for next cycle //x->cbonorm[x->cbord] = 0; x->cbord++; // increment read pointer if (x->cbord >= N) { x->cbord = 0; } // ********************* // * END Pitch Shifter * // ********************* ti4 = (x->cbiwr + 2)%N; if (iFcorr>=1) { // The second part of the formant corrector // This is a post-filter that re-applies the formants, designed // to result in the exact original signal when no pitch // manipulation is performed. // tf is signal input // gotta run it 3 times because of a pesky delay free loop // first time: compute 0-response tf2 = tf; fa = 0; fb = fa; for (ti=0; ti<ford; ti++) { fc = (fb-x->frc[ti])*frlamb + x->frb[ti]; tf = x->fbuff[ti][ti4]; fb = fc - tf*fa; x->ftvec[ti] = tf*fc; fa = fa - x->ftvec[ti]; } tf = -fa; for (ti=ford-1; ti>=0; ti--) { tf = tf + x->ftvec[ti]; } f0resp = tf; // second time: compute 1-response fa = 1; fb = fa; for (ti=0; ti<ford; ti++) { fc = (fb-x->frc[ti])*frlamb + x->frb[ti]; tf = x->fbuff[ti][ti4]; fb = fc - tf*fa; x->ftvec[ti] = tf*fc; fa = fa - x->ftvec[ti]; } tf = -fa; for (ti=ford-1; ti>=0; ti--) { tf = tf + x->ftvec[ti]; } f1resp = tf; // now solve equations for output, based on 0-response and 1-response tf = (float)2*tf2; tf2 = tf; tf = ((float)1 - f1resp + f0resp); if (tf!=0) { tf2 = (tf2 + f0resp) / tf; } else { tf2 = 0; } // third time: update delay registers fa = tf2; fb = fa; for (ti=0; ti<ford; ti++) { fc = (fb-x->frc[ti])*frlamb + x->frb[ti]; x->frc[ti] = fc; x->frb[ti] = fb; tf = x->fbuff[ti][ti4]; fb = fc - tf*fa; fa = fa - tf*fc; } tf = tf2; tf = tf + flpa*x->flp; // lowpass post-emphasis filter x->flp = tf; // Bring up the gain slowly when formant correction goes from disabled // to enabled, while things stabilize. if (x->fmute>0.5) { tf = tf*(x->fmute - 0.5)*2; } else { tf = 0; } tf2 = x->fmutealph; x->fmute = (1-tf2) + tf2*x->fmute; // now tf is signal output // ...and we're done messing with formants } else { x->fmute = 0; } // Write audio to output of plugin // Mix (blend between original (delayed) =0 and shifted/corrected =1) *(out++) = fMix*tf + (1-fMix)*x->cbi[ti4]; //*(pfOutput++) = (float) fMix*tf + (1-fMix)*x->cbi[(x->cbiwr - N + 1)%N]; } return (w + 5); // always add one more than the 2nd argument in dsp_add() }
void autotune_init(t_autotune *x,unsigned long sr) { unsigned long ti; x->fs = sr; x->aref = 440; x->fTune = x->aref; if (x->cbsize == 0) { if (x->fs >=88200) { x->cbsize = 4096; } else { x->cbsize = 2048; } } x->corrsize = x->cbsize / 2 + 1; x->pmax = 1/(float)70; // max and min periods (ms) x->pmin = 1/(float)2400; // eventually may want to bring these out as sliders x->pperiod = x->pmax; x->nmax = (unsigned long)(x->fs * x->pmax); if (x->nmax > x->corrsize) { x->nmax = x->corrsize; } x->nmin = (unsigned long)(x->fs * x->pmin); x->cbi = (float*) calloc(x->cbsize, sizeof(float)); x->cbf = (float*) calloc(x->cbsize, sizeof(float)); x->cbo = (float*) calloc(x->cbsize, sizeof(float)); //x->cbonorm = (float*) calloc(x->cbsize, sizeof(float)); x->cbiwr = 0; x->cbord = 0; x->lfophase = 0; // Initialize formant corrector x->ford = 7; // should be sufficient to capture formants x->falph = pow(0.001, (float) 80 / (x->fs)); x->flamb = -(0.8517*sqrt(atan(0.06583*x->fs))-0.1916); // or about -0.88 @ 44.1kHz x->fk = calloc(x->ford, sizeof(float)); x->fb = calloc(x->ford, sizeof(float)); x->fc = calloc(x->ford, sizeof(float)); x->frb = calloc(x->ford, sizeof(float)); x->frc = calloc(x->ford, sizeof(float)); x->fsig = calloc(x->ford, sizeof(float)); x->fsmooth = calloc(x->ford, sizeof(float)); x->fhp = 0; x->flp = 0; x->flpa = pow(0.001, (float) 10 / (x->fs)); x->fbuff = (float**) malloc((x->ford)*sizeof(float*)); for (ti=0; ti<x->ford; ti++) { x->fbuff[ti] = calloc(x->cbsize, sizeof(float)); } x->ftvec = calloc(x->ford, sizeof(float)); x->fmute = 1; x->fmutealph = pow(0.001, (float)1 / (x->fs)); // Standard raised cosine window, max height at N/2 x->hannwindow = (float*) calloc(x->cbsize, sizeof(float)); for (ti=0; ti<x->cbsize; ti++) { x->hannwindow[ti] = -0.5*cos(2*PI*ti/(x->cbsize - 1)) + 0.5; } // Generate a window with a single raised cosine from N/4 to 3N/4 x->cbwindow = (float*) calloc(x->cbsize, sizeof(float)); for (ti=0; ti<(x->cbsize / 2); ti++) { x->cbwindow[ti+x->cbsize/4] = -0.5*cos(4*PI*ti/(x->cbsize - 1)) + 0.5; } if (x->noverlap == 0) x->noverlap = 4; //fprintf(stderr,"%d %d\n", x->cbsize, x->noverlap); x->fx = fft_con(x->cbsize); x->ffttime = (float*) calloc(x->cbsize, sizeof(float)); x->fftfreqre = (float*) calloc(x->corrsize, sizeof(float)); x->fftfreqim = (float*) calloc(x->corrsize, sizeof(float)); // ---- Calculate autocorrelation of window ---- x->acwinv = (float*) calloc(x->cbsize, sizeof(float)); for (ti=0; ti<x->cbsize; ti++) { x->ffttime[ti] = x->cbwindow[ti]; } fft_forward(x->fx, x->cbwindow, x->fftfreqre, x->fftfreqim); for (ti=0; ti<x->corrsize; ti++) { x->fftfreqre[ti] = (x->fftfreqre[ti])*(x->fftfreqre[ti]) + (x->fftfreqim[ti])*(x->fftfreqim[ti]); x->fftfreqim[ti] = 0; } fft_inverse(x->fx, x->fftfreqre, x->fftfreqim, x->ffttime); for (ti=1; ti<x->cbsize; ti++) { x->acwinv[ti] = x->ffttime[ti]/x->ffttime[0]; if (x->acwinv[ti] > 0.000001) { x->acwinv[ti] = (float)1/x->acwinv[ti]; } else { x->acwinv[ti] = 0; } } x->acwinv[0] = 1; // ---- END Calculate autocorrelation of window ---- x->lrshift = 0; x->ptarget = 0; x->sptarget = 0; //x->sptarget = 0; //x->wasvoiced = 0; //x->persistamt = 0; //x->glidepersist = 100; // 100 ms glide persist x->vthresh = 0.7; // The voiced confidence (unbiased peak) threshold level // Pitch shifter initialization x->phprdd = 0.01; // Default period //x->phprd = x->phprdd; x->inphinc = (float)1/(x->phprdd * x->fs); x->phincfact = 1; x->phasein = 0; x->phaseout = 0; x->frag = (float*) calloc(x->cbsize, sizeof(float)); x->fragsize = 0; }
// Called every time we get a new chunk of audio void runAutotalent(Autotalent * Instance, unsigned long SampleCount) { // some kind of buffer, need to find out the type, looks like floats float* pfInput; float* pfOutput; float fAmount; float fSmooth; int iNotes[12]; int iPitch2Note[12]; int iNote2Pitch[12]; int numNotes; float fTune; float fFixed; float fPull; float fShift; int iScwarp; float fLfoamp; float fLforate; float fLfoshape; float fLfosymm; int iLfoquant; int iFcorr; float fFwarp; float fMix; Autotalent* psAutotalent; unsigned long lSampleIndex; long int N; long int Nf; long int fs; float pmin; float pmax; unsigned long nmin; unsigned long nmax; long int ti; long int ti2; long int ti3; long int ti4; float tf; float tf2; // Variables for cubic spline interpolator float indd; int ind0; int ind1; int ind2; int ind3; float vald; float val0; float val1; float val2; float val3; int lowersnap; int uppersnap; float lfoval; float pperiod; float inpitch; float conf; float outpitch; float aref; float fa; float fb; float fc; float fk; float flamb; float frlamb; float falph; float foma; float f1resp; float f0resp; float flpa; int ford; psAutotalent = (Autotalent *)Instance; pfInput = psAutotalent->m_pfInputBuffer1; pfOutput = psAutotalent->m_pfOutputBuffer1; fAmount = (float) *(psAutotalent->m_pfAmount); fSmooth = (float) *(psAutotalent->m_pfSmooth) * 0.8; // Scales max to a more reasonable value fTune = (float) *(psAutotalent->m_pfTune); iNotes[0] = psAutotalent->m_pfKey[AT_A]; iNotes[1] = psAutotalent->m_pfKey[AT_Bb]; iNotes[2] = psAutotalent->m_pfKey[AT_B]; iNotes[3] = psAutotalent->m_pfKey[AT_C]; iNotes[4] = psAutotalent->m_pfKey[AT_Db]; iNotes[5] = psAutotalent->m_pfKey[AT_D]; iNotes[6] = psAutotalent->m_pfKey[AT_Eb]; iNotes[7] = psAutotalent->m_pfKey[AT_E]; iNotes[8] = psAutotalent->m_pfKey[AT_F]; iNotes[9] = psAutotalent->m_pfKey[AT_Gb]; iNotes[10] = psAutotalent->m_pfKey[AT_G]; iNotes[11] = psAutotalent->m_pfKey[AT_Ab]; fFixed = (float) *(psAutotalent->m_pfFixed); fPull = (float) *(psAutotalent->m_pfPull); fShift = (float) *(psAutotalent->m_pfShift); iScwarp = (int) *(psAutotalent->m_pfScwarp); fLfoamp = (float) *(psAutotalent->m_pfLfoamp); fLforate = (float) *(psAutotalent->m_pfLforate); fLfoshape = (float) *(psAutotalent->m_pfLfoshape); fLfosymm = (float) *(psAutotalent->m_pfLfosymm); iLfoquant = (int) *(psAutotalent->m_pfLfoquant); iFcorr = (int) *(psAutotalent->m_pfFcorr); fFwarp = (float) *(psAutotalent->m_pfFwarp); fMix = (float) *(psAutotalent->m_pfMix); // Some logic for the semitone->scale and scale->semitone conversion // If no notes are selected as being in the scale, instead snap to all notes ti2 = 0; for (ti=0; ti<12; ti++) { if (iNotes[ti]>=0) { iPitch2Note[ti] = ti2; iNote2Pitch[ti2] = ti; ti2 = ti2 + 1; } else { iPitch2Note[ti] = -1; } } numNotes = ti2; while (ti2<12) { iNote2Pitch[ti2] = -1; ti2 = ti2 + 1; } if (numNotes==0) { for (ti=0; ti<12; ti++) { iNotes[ti] = 1; iPitch2Note[ti] = ti; iNote2Pitch[ti] = ti; } numNotes = 12; } iScwarp = (iScwarp + numNotes*5)%numNotes; ford = psAutotalent->ford; falph = psAutotalent->falph; foma = (float)1 - falph; flpa = psAutotalent->flpa; flamb = psAutotalent->flamb; tf = pow((float)2,fFwarp/2)*(1+flamb)/(1-flamb); frlamb = (tf - 1)/(tf + 1); psAutotalent->aref = (float)fTune; N = psAutotalent->cbsize; Nf = psAutotalent->corrsize; fs = psAutotalent->fs; pmax = psAutotalent->pmax; pmin = psAutotalent->pmin; nmax = psAutotalent->nmax; nmin = psAutotalent->nmin; aref = psAutotalent->aref; pperiod = psAutotalent->pmax; inpitch = psAutotalent->inpitch; conf = psAutotalent->conf; outpitch = psAutotalent->outpitch; /******************* * MAIN DSP LOOP * *******************/ for (lSampleIndex = 0; lSampleIndex < SampleCount; lSampleIndex++) { // load data into circular buffer tf = (float) *(pfInput++); ti4 = psAutotalent->cbiwr; psAutotalent->cbi[ti4] = tf; if (iFcorr>=1) { // Somewhat experimental formant corrector // formants are removed using an adaptive pre-filter and // re-introduced after pitch manipulation using post-filter // tf is signal input fa = tf - psAutotalent->fhp; // highpass pre-emphasis filter psAutotalent->fhp = tf; fb = fa; for (ti=0; ti<ford; ti++) { psAutotalent->fsig[ti] = fa*fa*foma + psAutotalent->fsig[ti]*falph; fc = (fb-psAutotalent->fc[ti])*flamb + psAutotalent->fb[ti]; psAutotalent->fc[ti] = fc; psAutotalent->fb[ti] = fb; fk = fa*fc*foma + psAutotalent->fk[ti]*falph; psAutotalent->fk[ti] = fk; tf = fk/(psAutotalent->fsig[ti] + 0.000001); tf = tf*foma + psAutotalent->fsmooth[ti]*falph; psAutotalent->fsmooth[ti] = tf; psAutotalent->fbuff[ti][ti4] = tf; fb = fc - tf*fa; fa = fa - tf*fc; } psAutotalent->cbf[ti4] = fa; // Now hopefully the formants are reduced // More formant correction code at the end of the DSP loop } else { psAutotalent->cbf[ti4] = tf; } // Input write pointer logic psAutotalent->cbiwr++; if (psAutotalent->cbiwr >= N) { psAutotalent->cbiwr = 0; } // ******************** // * Low-rate section * // ******************** // Every N/noverlap samples, run pitch estimation / manipulation code if ((psAutotalent->cbiwr)%(N/psAutotalent->noverlap) == 0) { // ---- Obtain autocovariance ---- // Window and fill FFT buffer ti2 = psAutotalent->cbiwr; for (ti=0; ti<N; ti++) { psAutotalent->ffttime[ti] = (float)(psAutotalent->cbi[(ti2-ti+N)%N]*psAutotalent->cbwindow[ti]); } // Calculate FFT fft_forward(psAutotalent->fmembvars, psAutotalent->ffttime, psAutotalent->fftfreqre, psAutotalent->fftfreqim); // Remove DC psAutotalent->fftfreqre[0] = 0; psAutotalent->fftfreqim[0] = 0; // Take magnitude squared for (ti=1; ti<Nf; ti++) { psAutotalent->fftfreqre[ti] = (psAutotalent->fftfreqre[ti])*(psAutotalent->fftfreqre[ti]) + (psAutotalent->fftfreqim[ti])*(psAutotalent->fftfreqim[ti]); psAutotalent->fftfreqim[ti] = 0; } // Calculate IFFT fft_inverse(psAutotalent->fmembvars, psAutotalent->fftfreqre, psAutotalent->fftfreqim, psAutotalent->ffttime); // Normalize tf = (float)1/psAutotalent->ffttime[0]; for (ti=1; ti<N; ti++) { psAutotalent->ffttime[ti] = psAutotalent->ffttime[ti] * tf; } psAutotalent->ffttime[0] = 1; // ---- END Obtain autocovariance ---- // ---- Calculate pitch and confidence ---- // Calculate pitch period // Pitch period is determined by the location of the max (biased) // peak within a given range // Confidence is determined by the corresponding unbiased height tf2 = 0; pperiod = pmin; for (ti=nmin; ti<nmax; ti++) { ti2 = ti-1; ti3 = ti+1; if (ti2<0) { ti2 = 0; } if (ti3>Nf) { ti3 = Nf; } tf = psAutotalent->ffttime[ti]; if (tf>psAutotalent->ffttime[ti2] && tf>=psAutotalent->ffttime[ti3] && tf>tf2) { tf2 = tf; ti4 = ti; } } if (tf2>0) { conf = tf2*psAutotalent->acwinv[ti4]; if (ti4>0 && ti4<Nf) { // Find the center of mass in the vicinity of the detected peak tf = psAutotalent->ffttime[ti4-1]*(ti4-1); tf = tf + psAutotalent->ffttime[ti4]*(ti4); tf = tf + psAutotalent->ffttime[ti4+1]*(ti4+1); tf = tf/(psAutotalent->ffttime[ti4-1] + psAutotalent->ffttime[ti4] + psAutotalent->ffttime[ti4+1]); pperiod = tf/fs; } else { pperiod = (float)ti4/fs; } } // Convert to semitones tf = (float) -12*log10((float)aref*pperiod)*L2SC; if (conf>=psAutotalent->vthresh) { inpitch = tf; psAutotalent->inpitch = tf; // update pitch only if voiced } psAutotalent->conf = conf; *(psAutotalent->m_pfPitch) = inpitch; *(psAutotalent->m_pfConf) = conf; // ---- END Calculate pitch and confidence ---- // ---- Modify pitch in all kinds of ways! ---- outpitch = inpitch; // Pull to fixed pitch outpitch = (1-fPull)*outpitch + fPull*fFixed; // -- Convert from semitones to scale notes -- ti = (int)(outpitch/12 + 32) - 32; // octave tf = outpitch - ti*12; // semitone in octave ti2 = (int)tf; ti3 = ti2 + 1; // a little bit of pitch correction logic, since it's a convenient place for it if (iNotes[ti2%12]<0 || iNotes[ti3%12]<0) { // if between 2 notes that are more than a semitone apart lowersnap = 1; uppersnap = 1; } else { lowersnap = 0; uppersnap = 0; if (iNotes[ti2%12]==1) { // if specified by user lowersnap = 1; } if (iNotes[ti3%12]==1) { // if specified by user uppersnap = 1; } } // (back to the semitone->scale conversion) // finding next lower pitch in scale while (iNotes[(ti2+12)%12]<0) { ti2 = ti2 - 1; } // finding next higher pitch in scale while (iNotes[ti3%12]<0) { ti3 = ti3 + 1; } tf = (tf-ti2)/(ti3-ti2) + iPitch2Note[(ti2+12)%12]; if (ti2<0) { tf = tf - numNotes; } outpitch = tf + numNotes*ti; // -- Done converting to scale notes -- // The actual pitch correction ti = (int)(outpitch+128) - 128; tf = outpitch - ti - 0.5; ti2 = ti3-ti2; if (ti2>2) { // if more than 2 semitones apart, put a 2-semitone-like transition halfway between tf2 = (float)ti2/2; } else { tf2 = (float)1; } if (fSmooth<0.001) { tf2 = tf*tf2/0.001; } else { tf2 = tf*tf2/fSmooth; } if (tf2<-0.5) tf2 = -0.5; if (tf2>0.5) tf2 = 0.5; tf2 = 0.5*sin(PI*tf2) + 0.5; // jumping between notes using horizontally-scaled sine segment tf2 = tf2 + ti; if ( (tf<0.5 && lowersnap) || (tf>=0.5 && uppersnap) ) { outpitch = fAmount*tf2 + ((float)1-fAmount)*outpitch; } // Add in pitch shift outpitch = outpitch + fShift; // LFO logic tf = fLforate*N/(psAutotalent->noverlap*fs); if (tf>1) tf=1; psAutotalent->lfophase = psAutotalent->lfophase + tf; if (psAutotalent->lfophase>1) psAutotalent->lfophase = psAutotalent->lfophase-1; lfoval = psAutotalent->lfophase; tf = (fLfosymm + 1)/2; if (tf<=0 || tf>=1) { if (tf<=0) { lfoval = 1-lfoval; } } else { if (lfoval<=tf) { lfoval = lfoval/tf; } else { lfoval = 1 - (lfoval-tf)/(1-tf); } } if (fLfoshape>=0) { // linear combination of cos and line lfoval = (0.5 - 0.5*cos(lfoval*PI))*fLfoshape + lfoval*(1-fLfoshape); lfoval = fLfoamp*(lfoval*2 - 1); } else { // smoosh the sine horizontally until it's squarish tf = 1 + fLfoshape; if (tf<0.001) { lfoval = (lfoval - 0.5)*2/0.001; } else { lfoval = (lfoval - 0.5)*2/tf; } if (lfoval>1) lfoval = 1; if (lfoval<-1) lfoval = -1; lfoval = fLfoamp*sin(lfoval*PI*0.5); } // add in quantized LFO if (iLfoquant>=1) { outpitch = outpitch + (int)(numNotes*lfoval + numNotes + 0.5) - numNotes; } // Convert back from scale notes to semitones outpitch = outpitch + iScwarp; // output scale rotate implemented here ti = (int)(outpitch/numNotes + 32) - 32; tf = outpitch - ti*numNotes; ti2 = (int)tf; ti3 = ti2 + 1; outpitch = iNote2Pitch[ti3%numNotes] - iNote2Pitch[ti2]; if (ti3>=numNotes) { outpitch = outpitch + 12; } outpitch = outpitch*(tf - ti2) + iNote2Pitch[ti2]; outpitch = outpitch + 12*ti; outpitch = outpitch - (iNote2Pitch[iScwarp] - iNote2Pitch[0]); //more scale rotation here // add in unquantized LFO if (iLfoquant<=0) { outpitch = outpitch + lfoval*2; } if (outpitch<-36) outpitch = -48; if (outpitch>24) outpitch = 24; psAutotalent->outpitch = outpitch; // ---- END Modify pitch in all kinds of ways! ---- // Compute variables for pitch shifter that depend on pitch psAutotalent->inphinc = aref*pow(2,inpitch/12)/fs; psAutotalent->outphinc = aref*pow(2,outpitch/12)/fs; psAutotalent->phincfact = psAutotalent->outphinc/psAutotalent->inphinc; } // ************************ // * END Low-Rate Section * // ************************ // ***************** // * Pitch Shifter * // ***************** // Pitch shifter (kind of like a pitch-synchronous version of Fairbanks' technique) // Note: pitch estimate is naturally N/2 samples old psAutotalent->phasein = psAutotalent->phasein + psAutotalent->inphinc; psAutotalent->phaseout = psAutotalent->phaseout + psAutotalent->outphinc; // When input phase resets, take a snippet from N/2 samples in the past if (psAutotalent->phasein >= 1) { psAutotalent->phasein = psAutotalent->phasein - 1; ti2 = psAutotalent->cbiwr - N/2; for (ti=-N/2; ti<N/2; ti++) { psAutotalent->frag[(ti+N)%N] = psAutotalent->cbf[(ti + ti2 + N)%N]; } } // When output phase resets, put a snippet N/2 samples in the future if (psAutotalent->phaseout >= 1) { psAutotalent->fragsize = psAutotalent->fragsize*2; if (psAutotalent->fragsize > N) { psAutotalent->fragsize = N; } psAutotalent->phaseout = psAutotalent->phaseout - 1; ti2 = psAutotalent->cbord + N/2; ti3 = (long int)(((float)psAutotalent->fragsize) / psAutotalent->phincfact); if (ti3>=N/2) { ti3 = N/2 - 1; } for (ti=-ti3/2; ti<(ti3/2); ti++) { tf = psAutotalent->hannwindow[(long int)N/2 + ti*(long int)N/ti3]; // 3rd degree polynomial interpolator - based on eqns from Hal Chamberlin's book indd = psAutotalent->phincfact*ti; ind1 = (int)indd; ind2 = ind1+1; ind3 = ind1+2; ind0 = ind1-1; val0 = psAutotalent->frag[(ind0+N)%N]; val1 = psAutotalent->frag[(ind1+N)%N]; val2 = psAutotalent->frag[(ind2+N)%N]; val3 = psAutotalent->frag[(ind3+N)%N]; vald = 0; vald = vald - (float)0.166666666667 * val0 * (indd - ind1) * (indd - ind2) * (indd - ind3); vald = vald + (float)0.5 * val1 * (indd - ind0) * (indd - ind2) * (indd - ind3); vald = vald - (float)0.5 * val2 * (indd - ind0) * (indd - ind1) * (indd - ind3); vald = vald + (float)0.166666666667 * val3 * (indd - ind0) * (indd - ind1) * (indd - ind2); psAutotalent->cbo[(ti + ti2 + N)%N] = psAutotalent->cbo[(ti + ti2 + N)%N] + vald*tf; } psAutotalent->fragsize = 0; } psAutotalent->fragsize++; // Get output signal from buffer tf = psAutotalent->cbo[psAutotalent->cbord]; // read buffer psAutotalent->cbo[psAutotalent->cbord] = 0; // erase for next cycle psAutotalent->cbord++; // increment read pointer if (psAutotalent->cbord >= N) { psAutotalent->cbord = 0; } // ********************* // * END Pitch Shifter * // ********************* ti4 = (psAutotalent->cbiwr + 2)%N; if (iFcorr>=1) { // The second part of the formant corrector // This is a post-filter that re-applies the formants, designed // to result in the exact original signal when no pitch // manipulation is performed. // tf is signal input // gotta run it 3 times because of a pesky delay free loop // first time: compute 0-response tf2 = tf; fa = 0; fb = fa; for (ti=0; ti<ford; ti++) { fc = (fb-psAutotalent->frc[ti])*frlamb + psAutotalent->frb[ti]; tf = psAutotalent->fbuff[ti][ti4]; fb = fc - tf*fa; psAutotalent->ftvec[ti] = tf*fc; fa = fa - psAutotalent->ftvec[ti]; } tf = -fa; for (ti=ford-1; ti>=0; ti--) { tf = tf + psAutotalent->ftvec[ti]; } f0resp = tf; // second time: compute 1-response fa = 1; fb = fa; for (ti=0; ti<ford; ti++) { fc = (fb-psAutotalent->frc[ti])*frlamb + psAutotalent->frb[ti]; tf = psAutotalent->fbuff[ti][ti4]; fb = fc - tf*fa; psAutotalent->ftvec[ti] = tf*fc; fa = fa - psAutotalent->ftvec[ti]; } tf = -fa; for (ti=ford-1; ti>=0; ti--) { tf = tf + psAutotalent->ftvec[ti]; } f1resp = tf; // now solve equations for output, based on 0-response and 1-response tf = (float)2*tf2; tf2 = tf; tf = ((float)1 - f1resp + f0resp); if (tf!=0) { tf2 = (tf2 + f0resp) / tf; } else { tf2 = 0; } // third time: update delay registers fa = tf2; fb = fa; for (ti=0; ti<ford; ti++) { fc = (fb-psAutotalent->frc[ti])*frlamb + psAutotalent->frb[ti]; psAutotalent->frc[ti] = fc; psAutotalent->frb[ti] = fb; tf = psAutotalent->fbuff[ti][ti4]; fb = fc - tf*fa; fa = fa - tf*fc; } tf = tf2; tf = tf + flpa*psAutotalent->flp; // lowpass post-emphasis filter psAutotalent->flp = tf; // Bring up the gain slowly when formant correction goes from disabled // to enabled, while things stabilize. if (psAutotalent->fmute>0.5) { tf = tf*(psAutotalent->fmute - 0.5)*2; } else { tf = 0; } tf2 = psAutotalent->fmutealph; psAutotalent->fmute = (1-tf2) + tf2*psAutotalent->fmute; // now tf is signal output // ...and we're done messing with formants } else { psAutotalent->fmute = 0; } // Write audio to output of plugin // Mix (blend between original (delayed) =0 and processed =1) *(pfOutput++) = fMix*tf + (1-fMix)*psAutotalent->cbi[ti4]; } // Tell the host the algorithm latency *(psAutotalent->m_pfLatency) = (N-1); }
Autotalent * instantiateAutotalent(unsigned long SampleRate) { unsigned long ti; Autotalent* membvars = malloc(sizeof(Autotalent)); membvars->aref = 440; membvars->fs = SampleRate; if (SampleRate>=88200) { membvars->cbsize = 4096; } else { membvars->cbsize = 2048; } membvars->corrsize = membvars->cbsize / 2 + 1; membvars->pmax = 1/(float)70; // max and min periods (ms) membvars->pmin = 1/(float)700; // eventually may want to bring these out as sliders membvars->nmax = (unsigned long)(SampleRate * membvars->pmax); if (membvars->nmax > membvars->corrsize) { membvars->nmax = membvars->corrsize; } membvars->nmin = (unsigned long)(SampleRate * membvars->pmin); membvars->cbi = calloc(membvars->cbsize, sizeof(float)); membvars->cbf = calloc(membvars->cbsize, sizeof(float)); membvars->cbo = calloc(membvars->cbsize, sizeof(float)); membvars->cbiwr = 0; membvars->cbord = 0; membvars->lfophase = 0; // Initialize formant corrector membvars->ford = 7; // should be sufficient to capture formants membvars->falph = pow(0.001, (float) 80 / (SampleRate)); membvars->flamb = -(0.8517*sqrt(atan(0.06583*SampleRate))-0.1916); // or about -0.88 @ 44.1kHz membvars->fk = calloc(membvars->ford, sizeof(float)); membvars->fb = calloc(membvars->ford, sizeof(float)); membvars->fc = calloc(membvars->ford, sizeof(float)); membvars->frb = calloc(membvars->ford, sizeof(float)); membvars->frc = calloc(membvars->ford, sizeof(float)); membvars->fsig = calloc(membvars->ford, sizeof(float)); membvars->fsmooth = calloc(membvars->ford, sizeof(float)); membvars->fhp = 0; membvars->flp = 0; membvars->flpa = pow(0.001, (float) 10 / (SampleRate)); membvars->fbuff = (float**) malloc((membvars->ford)*sizeof(float*)); for (ti=0; ti<membvars->ford; ti++) { membvars->fbuff[ti] = calloc(membvars->cbsize, sizeof(float)); } membvars->ftvec = calloc(membvars->ford, sizeof(float)); membvars->fmute = 1; membvars->fmutealph = pow(0.001, (float)1 / (SampleRate)); // Standard raised cosine window, max height at N/2 membvars->hannwindow = calloc(membvars->cbsize, sizeof(float)); for (ti=0; ti<membvars->cbsize; ti++) { membvars->hannwindow[ti] = -0.5*cos(2*PI*ti/membvars->cbsize) + 0.5; } // Generate a window with a single raised cosine from N/4 to 3N/4 membvars->cbwindow = calloc(membvars->cbsize, sizeof(float)); for (ti=0; ti<(membvars->cbsize / 2); ti++) { membvars->cbwindow[ti+membvars->cbsize/4] = -0.5*cos(4*PI*ti/(membvars->cbsize - 1)) + 0.5; } membvars->noverlap = 4; membvars->fmembvars = fft_con(membvars->cbsize); membvars->ffttime = calloc(membvars->cbsize, sizeof(float)); membvars->fftfreqre = calloc(membvars->corrsize, sizeof(float)); membvars->fftfreqim = calloc(membvars->corrsize, sizeof(float)); // ---- Calculate autocorrelation of window ---- membvars->acwinv = calloc(membvars->cbsize, sizeof(float)); for (ti=0; ti<membvars->cbsize; ti++) { membvars->ffttime[ti] = membvars->cbwindow[ti]; } fft_forward(membvars->fmembvars, membvars->cbwindow, membvars->fftfreqre, membvars->fftfreqim); for (ti=0; ti<membvars->corrsize; ti++) { membvars->fftfreqre[ti] = (membvars->fftfreqre[ti])*(membvars->fftfreqre[ti]) + (membvars->fftfreqim[ti])*(membvars->fftfreqim[ti]); membvars->fftfreqim[ti] = 0; } fft_inverse(membvars->fmembvars, membvars->fftfreqre, membvars->fftfreqim, membvars->ffttime); for (ti=1; ti<membvars->cbsize; ti++) { membvars->acwinv[ti] = membvars->ffttime[ti]/membvars->ffttime[0]; if (membvars->acwinv[ti] > 0.000001) { membvars->acwinv[ti] = (float)1/membvars->acwinv[ti]; } else { membvars->acwinv[ti] = 0; } } membvars->acwinv[0] = 1; // ---- END Calculate autocorrelation of window ---- membvars->lrshift = 0; membvars->ptarget = 0; membvars->sptarget = 0; membvars->vthresh = 0.7; // The voiced confidence (unbiased peak) threshold level // Pitch shifter initialization membvars->phprdd = 0.01; // Default period membvars->inphinc = (float)1/(membvars->phprdd * SampleRate); membvars->phincfact = 1; membvars->phasein = 0; membvars->phaseout = 0; membvars->frag = calloc(membvars->cbsize, sizeof(float)); membvars->fragsize = 0; return membvars; }
void process(char* ims_name, char* imd_name, char* filter, int d0, int n, int w, int u0, int v0) { /* Selection du filtre */ /*float (*function_pointer) (double, double, double, double, int, int, int); if(strcmp(filter,"lp") == 0){ printf("low pass filter\n"); function_pointer = lp; } else if(strcmp(filter,"hp") == 0){ printf("high pass filter\n"); function_pointer = hp; } else if(strcmp(filter,"br") == 0){ printf("band reject filter\n"); function_pointer = br; } else if(strcmp(filter,"bp") == 0){ printf("band pass filter\n"); function_pointer = br; } else if(strcmp(filter,"no") == 0){ printf("rejet d'encoche \n"); function_pointer = no; } else { printf("unknown filter,\n filters avalaible: lp, hp, br, bp, no"); assert(false); }*/ pnm ims = pnm_load(ims_name); int width = pnm_get_width(ims); int height = pnm_get_height(ims); pnm imd = pnm_new(width, height, PnmRawPpm); unsigned short * image = (unsigned short *) malloc(height * width * sizeof(unsigned short)); fftw_complex * freq_repr = (fftw_complex *) fftw_malloc(height* width * sizeof(fftw_complex)); image = pnm_get_channel(ims, image, PnmRed); freq_repr = fft_forward(height, width, image); float * as = (float *) malloc(sizeof(float) * height * width); float * ps = (float *) malloc(sizeof(float) * height * width); fft_fr_to_spectra(width, height, freq_repr, as, ps); pnm imd2 = pnm_new(width, height, PnmRawPpm); for(int y = 0; y < height; y++) for(int x = 0; x < width; x++) for(int z = 0; z < 3; z++) pnm_set_component(imd2,y,x,z,as[x+y*height]); pnm_save(imd2, PnmRawPpm, "toto.ppm"); //float d_u_v; for(int j=0; j<height; j++){ for(int i=0; i<width; i++){ //d_u_v = sqrt((float)(j-height/2)*(j-height/2)+(float)(i-width/2)*(i-width/2)); //as[i+j*width] = function_pointer(i, j, u0, v0, n, w, d0) * as[i+j*width]; as[i+j*width] = low_pass(d0, n, d((float) i-width/2,(float) j-height/2)) * as[i+j*width]; //printf("%f \n", hp(i, j, u0, v0, n, w, d0)); //printf("%d \n", function_pointer(i, j, u0, v0, n, w, d0)); } } pnm imd3 = pnm_new(width, height, PnmRawPpm); for(int y = 0; y < height; y++) for(int x = 0; x < width; x++) for(int z = 0; z < 3; z++) pnm_set_component(imd3,y,x,z,as[x+y*height]); pnm_save(imd3, PnmRawPpm, "toto2.ppm"); fft_spectra_to_fr(height,width,as,ps, freq_repr); image = fft_backward(height, width, freq_repr); for(int y = 0; y < height; y++) for(int x = 0; x < width; x++) for(int z = 0; z < 3; z++) pnm_set_component(imd,y,x,z,image[x+y*height]); pnm_save(imd, PnmRawPpm, imd_name); free(image); }