static autoSpectrum Cepstrum_to_Spectrum2 (Cepstrum me) { //TODO power cepstrum try { autoNUMfft_Table fftTable; long numberOfSamples = 2 * my nx - 2; autoNUMvector<double> fftbuf (1, numberOfSamples); autoSpectrum thee = Spectrum_create (0.5 / my dx, my nx); fftbuf[1] = sqrt (my z[1][1]); for (long i = 2; i <= my nx; i++) { fftbuf[i] = 2.0 * sqrt (my z[1][i]); } // fftbuf[my nx+1 ... numberOfSamples] = 0 NUMfft_Table_init (&fftTable, numberOfSamples); NUMfft_forward (&fftTable, fftbuf.peek()); thy z[1][1] = fabs (fftbuf[1]); for (long i = 2; i < my nx; i++) { double br = fftbuf[i + i - 2], bi = fftbuf[i + i - 1]; thy z[1][i] = sqrt (br * br + bi * bi); } thy z[1][my nx] = fabs (fftbuf[numberOfSamples]); for (long i = 1; i <= my nx; i++) { thy z[1][i] = exp (NUMln10 * thy z[1][i] / 20.0) * 2e-5 / sqrt (2 * thy dx); thy z[2][i] = 0.0; } return thee; } catch (MelderError) { Melder_throw (me, U": no Spectrum created."); } }
static autoCepstrum Spectrum_to_Cepstrum2 (Spectrum me) { try { autoNUMfft_Table fftTable; // originalNumberOfSamplesProbablyOdd irrelevant if (my x1 != 0.0) { Melder_throw (U"A Fourier-transformable Spectrum must have a first frequency of 0 Hz, not ", my x1, U" Hz."); } long numberOfSamples = 2 * my nx - 2; autoCepstrum thee = Cepstrum_create (0.5 / my dx, my nx); // my dx = 1 / (dT * N) = 1 / (duration of sound) thy dx = 1 / (my dx * numberOfSamples); // Cepstrum is on [-T/2, T/2] ! NUMfft_Table_init (&fftTable, numberOfSamples); autoNUMvector<double> fftbuf (1, numberOfSamples); fftbuf[1] = my v_getValueAtSample (1, 0, 2); for (long i = 2; i < my nx; i++) { fftbuf [i + i - 2] = my v_getValueAtSample (i, 0, 2); fftbuf [i + i - 1] = 0.0; } fftbuf [numberOfSamples] = my v_getValueAtSample (my nx, 0, 2); NUMfft_backward (&fftTable, fftbuf.peek()); for (long i = 1; i <= my nx; i++) { double val = fftbuf[i] / numberOfSamples; // scaling 1/n because ifft(fft(1))= n; thy z[1][i] = val * val; // power cepstrum } return thee; } catch (MelderError) { Melder_throw (me, U": not converted to Cepstrum."); } }
PowerCepstrogram Sound_to_PowerCepstrogram_hillenbrand (Sound me, double minimumPitch, double dt) { try { // minimum analysis window has 3 periods of lowest pitch double analysisWidth = 3 / minimumPitch; if (analysisWidth > my dx * my nx) { analysisWidth = my dx * my nx; } double t1, samplingFrequency = 1 / my dx; autoSound thee; if (samplingFrequency > 30000) { samplingFrequency = samplingFrequency / 2; thee.reset (Sound_resample (me, samplingFrequency, 1)); } else { thee.reset (Data_copy (me)); } // pre-emphasis with fixed coefficient 0.9 for (long i = thy nx; i > 1; i--) { thy z[1][i] -= 0.9 * thy z[1][i - 1]; } long nosInWindow = analysisWidth * samplingFrequency, nFrames; if (nosInWindow < 8) { Melder_throw ("Analysis window too short."); } Sampled_shortTermAnalysis (thee.peek(), analysisWidth, dt, & nFrames, & t1); autoNUMvector<double> hamming (1, nosInWindow); for (long i = 1; i <= nosInWindow; i++) { hamming[i] = 0.54 -0.46 * cos(2 * NUMpi * (i - 1) / (nosInWindow - 1)); } long nfft = 8; // minimum possible while (nfft < nosInWindow) { nfft *= 2; } long nfftdiv2 = nfft / 2; autoNUMvector<double> fftbuf (1, nfft); // "complex" array autoNUMvector<double> spectrum (1, nfftdiv2 + 1); // +1 needed autoNUMfft_Table fftTable; NUMfft_Table_init (&fftTable, nfft); // sound to spectrum double qmax = 0.5 * nfft / samplingFrequency, dq = qmax / (nfftdiv2 + 1); autoPowerCepstrogram him = PowerCepstrogram_create (my xmin, my xmax, nFrames, dt, t1, 0, qmax, nfftdiv2+1, dq, 0); autoMelderProgress progress (L"Cepstrogram analysis"); for (long iframe = 1; iframe <= nFrames; iframe++) { double tbegin = t1 + (iframe - 1) * dt - analysisWidth / 2; tbegin = tbegin < thy xmin ? thy xmin : tbegin; long istart = Sampled_xToIndex (thee.peek(), tbegin); istart = istart < 1 ? 1 : istart; long iend = istart + nosInWindow - 1; iend = iend > thy nx ? thy nx : iend; for (long i = 1; i <= nosInWindow; i++) { fftbuf[i] = thy z[1][istart + i - 1] * hamming[i]; } for (long i = nosInWindow + 1; i <= nfft; i++) { fftbuf[i] = 0; } NUMfft_forward (&fftTable, fftbuf.peek()); complexfftoutput_to_power (fftbuf.peek(), nfft, spectrum.peek(), true); // log10(|fft|^2) // subtract average double specmean = spectrum[1]; for (long i = 2; i <= nfftdiv2 + 1; i++) { specmean += spectrum[i]; } specmean /= nfftdiv2 + 1; for (long i = 1; i <= nfftdiv2 + 1; i++) { spectrum[i] -= specmean; } /* * Here we diverge from Hillenbrand as he takes the fft of half of the spectral values. * H. forgets that the actual spectrum has nfft/2+1 values. Thefore, we take the inverse * transform because this keeps the number of samples a power of 2. * At the same time this results in twice as much numbers in the quefrency domain, i.e. we end with nfft/2+1 * numbers while H. has only nfft/4! */ fftbuf[1] = spectrum[1]; for (long i = 2; i < nfftdiv2 + 1; i++) { fftbuf[i+i-2] = spectrum[i]; fftbuf[i+i-1] = 0; } fftbuf[nfft] = spectrum[nfftdiv2 + 1]; NUMfft_backward (&fftTable, fftbuf.peek()); for (long i = 1; i <= nfftdiv2 + 1; i++) { his z[i][iframe] = fftbuf[i] * fftbuf[i]; } if ((iframe % 10) == 1) { Melder_progress ((double) iframe / nFrames, L"Cepstrogram analysis of frame ", Melder_integer (iframe), L" out of ", Melder_integer (nFrames), L"."); } } return him.transfer(); } catch (MelderError) { Melder_throw (me, ": no Cepstrogram created."); } }