static autoSpectrum Cepstrum_to_Spectrum2 (Cepstrum me) { //TODO power cepstrum try { autoNUMfft_Table fftTable; long numberOfSamples = 2 * my nx - 2; autoNUMvector<double> fftbuf (1, numberOfSamples); autoSpectrum thee = Spectrum_create (0.5 / my dx, my nx); fftbuf[1] = sqrt (my z[1][1]); for (long i = 2; i <= my nx; i++) { fftbuf[i] = 2.0 * sqrt (my z[1][i]); } // fftbuf[my nx+1 ... numberOfSamples] = 0 NUMfft_Table_init (&fftTable, numberOfSamples); NUMfft_forward (&fftTable, fftbuf.peek()); thy z[1][1] = fabs (fftbuf[1]); for (long i = 2; i < my nx; i++) { double br = fftbuf[i + i - 2], bi = fftbuf[i + i - 1]; thy z[1][i] = sqrt (br * br + bi * bi); } thy z[1][my nx] = fabs (fftbuf[numberOfSamples]); for (long i = 1; i <= my nx; i++) { thy z[1][i] = exp (NUMln10 * thy z[1][i] / 20.0) * 2e-5 / sqrt (2 * thy dx); thy z[2][i] = 0.0; } return thee; } catch (MelderError) { Melder_throw (me, U": no Spectrum created."); } }
Cepstrum Spectrum_to_Cepstrum_hillenbrand (Spectrum me) { try { autoNUMfft_Table fftTable; // originalNumberOfSamplesProbablyOdd irrelevant if (my x1 != 0.0) { Melder_throw ("A Fourier-transformable Spectrum must have a first frequency of 0 Hz, not ", my x1, L" Hz."); } long numberOfSamples = my nx - 1; autoCepstrum thee = Cepstrum_create (0.5 / my dx, my nx); NUMfft_Table_init (&fftTable, my nx); autoNUMvector<double> amp (1, my nx); for (long i = 1; i <= my nx; i++) { amp [i] = my v_getValueAtSample (i, 0, 2); } NUMfft_forward (&fftTable, amp.peek()); for (long i = 1; i <= my nx; i++) { double val = amp[i] / numberOfSamples;// scaling 1/n because ifft(fft(1))= n; thy z[1][i] = val * val; // power cepstrum } return thee.transfer(); } catch (MelderError) { Melder_throw (me, ": not converted to Sound."); } }
static autoCepstrum Spectrum_to_Cepstrum2 (Spectrum me) { try { autoNUMfft_Table fftTable; // originalNumberOfSamplesProbablyOdd irrelevant if (my x1 != 0.0) { Melder_throw (U"A Fourier-transformable Spectrum must have a first frequency of 0 Hz, not ", my x1, U" Hz."); } long numberOfSamples = 2 * my nx - 2; autoCepstrum thee = Cepstrum_create (0.5 / my dx, my nx); // my dx = 1 / (dT * N) = 1 / (duration of sound) thy dx = 1 / (my dx * numberOfSamples); // Cepstrum is on [-T/2, T/2] ! NUMfft_Table_init (&fftTable, numberOfSamples); autoNUMvector<double> fftbuf (1, numberOfSamples); fftbuf[1] = my v_getValueAtSample (1, 0, 2); for (long i = 2; i < my nx; i++) { fftbuf [i + i - 2] = my v_getValueAtSample (i, 0, 2); fftbuf [i + i - 1] = 0.0; } fftbuf [numberOfSamples] = my v_getValueAtSample (my nx, 0, 2); NUMfft_backward (&fftTable, fftbuf.peek()); for (long i = 1; i <= my nx; i++) { double val = fftbuf[i] / numberOfSamples; // scaling 1/n because ifft(fft(1))= n; thy z[1][i] = val * val; // power cepstrum } return thee; } catch (MelderError) { Melder_throw (me, U": not converted to Cepstrum."); } }
void NUMforwardRealFastFourierTransform (double *data, long n) { autoNUMfft_Table table; NUMfft_Table_init (& table, n); NUMfft_forward (& table, data); if (n > 1) { // To be compatible with old behaviour double tmp = data[n]; for (long i = n; i > 2; i--) { data[i] = data[i - 1]; } data[2] = tmp; } }
void NUMreverseRealFastFourierTransform (double *data, long n) { autoNUMfft_Table table; if (n > 1) { // To be compatible with old behaviour double tmp = data[2]; for (long i = 2; i < n; i++) { data[i] = data[i + 1]; } data[n] = tmp; } NUMfft_Table_init (& table, n); NUMfft_backward (& table, data); }
autoSpectrum Sound_to_Spectrum (Sound me, int fast) { try { long numberOfSamples = my nx; if (fast) { numberOfSamples = 2; while (numberOfSamples < my nx) numberOfSamples *= 2; } long numberOfFrequencies = numberOfSamples / 2 + 1; // 4 samples -> cos0 cos1 sin1 cos2; 5 samples -> cos0 cos1 sin1 cos2 sin2 autoNUMvector <double> data (1, numberOfSamples); autoNUMfft_Table fourierTable; NUMfft_Table_init (& fourierTable, numberOfSamples); for (long i = 1; i <= my nx; i ++) data [i] = my ny == 1 ? my z [1] [i] : 0.5 * (my z [1] [i] + my z [2] [i]); NUMfft_forward (& fourierTable, data.peek()); autoSpectrum thee = Spectrum_create (0.5 / my dx, numberOfFrequencies); thy dx = 1.0 / (my dx * numberOfSamples); // override double *re = thy z [1]; double *im = thy z [2]; double scaling = my dx; re [1] = data [1] * scaling; im [1] = 0.0; for (long i = 2; i < numberOfFrequencies; i ++) { re [i] = data [i + i - 2] * scaling; im [i] = data [i + i - 1] * scaling; } if ((numberOfSamples & 1) != 0) { if (numberOfSamples > 1) { re [numberOfFrequencies] = data [numberOfSamples - 1] * scaling; im [numberOfFrequencies] = data [numberOfSamples] * scaling; } } else { re [numberOfFrequencies] = data [numberOfSamples] * scaling; im [numberOfFrequencies] = 0.0; } return thee; } catch (MelderError) { Melder_throw (me, U": not converted to Spectrum."); } }
autoSpectrogram Sound_to_Spectrogram (Sound me, double effectiveAnalysisWidth, double fmax, double minimumTimeStep1, double minimumFreqStep1, enum kSound_to_Spectrogram_windowShape windowType, double maximumTimeOversampling, double maximumFreqOversampling) { try { double nyquist = 0.5 / my dx; double physicalAnalysisWidth = windowType == kSound_to_Spectrogram_windowShape_GAUSSIAN ? 2 * effectiveAnalysisWidth : effectiveAnalysisWidth; double effectiveTimeWidth = effectiveAnalysisWidth / sqrt (NUMpi); double effectiveFreqWidth = 1 / effectiveTimeWidth; double minimumTimeStep2 = effectiveTimeWidth / maximumTimeOversampling; double minimumFreqStep2 = effectiveFreqWidth / maximumFreqOversampling; double timeStep = minimumTimeStep1 > minimumTimeStep2 ? minimumTimeStep1 : minimumTimeStep2; double freqStep = minimumFreqStep1 > minimumFreqStep2 ? minimumFreqStep1 : minimumFreqStep2; double duration = my dx * (double) my nx, windowssq = 0.0; /* * Compute the time sampling. */ long nsamp_window = (long) floor (physicalAnalysisWidth / my dx); long halfnsamp_window = nsamp_window / 2 - 1; nsamp_window = halfnsamp_window * 2; if (nsamp_window < 1) Melder_throw (U"Your analysis window is too short: less than two samples."); if (physicalAnalysisWidth > duration) Melder_throw (U"Your sound is too short:\n" U"it should be at least as long as ", windowType == kSound_to_Spectrogram_windowShape_GAUSSIAN ? U"two window lengths." : U"one window length."); long numberOfTimes = 1 + (long) floor ((duration - physicalAnalysisWidth) / timeStep); // >= 1 double t1 = my x1 + 0.5 * ((double) (my nx - 1) * my dx - (double) (numberOfTimes - 1) * timeStep); /* Centre of first frame. */ /* * Compute the frequency sampling of the FFT spectrum. */ if (fmax <= 0.0 || fmax > nyquist) fmax = nyquist; long numberOfFreqs = (long) floor (fmax / freqStep); if (numberOfFreqs < 1) return autoSpectrogram (); long nsampFFT = 1; while (nsampFFT < nsamp_window || nsampFFT < 2 * numberOfFreqs * (nyquist / fmax)) nsampFFT *= 2; long half_nsampFFT = nsampFFT / 2; /* * Compute the frequency sampling of the spectrogram. */ long binWidth_samples = (long) floor (freqStep * my dx * nsampFFT); if (binWidth_samples < 1) binWidth_samples = 1; double binWidth_hertz = 1.0 / (my dx * nsampFFT); freqStep = binWidth_samples * binWidth_hertz; numberOfFreqs = (long) floor (fmax / freqStep); if (numberOfFreqs < 1) return autoSpectrogram (); autoSpectrogram thee = Spectrogram_create (my xmin, my xmax, numberOfTimes, timeStep, t1, 0.0, fmax, numberOfFreqs, freqStep, 0.5 * (freqStep - binWidth_hertz)); autoNUMvector <double> frame (1, nsampFFT); autoNUMvector <double> spec (1, nsampFFT); autoNUMvector <double> window (1, nsamp_window); autoNUMfft_Table fftTable; NUMfft_Table_init (& fftTable, nsampFFT); autoMelderProgress progress (U"Sound to Spectrogram..."); for (long i = 1; i <= nsamp_window; i ++) { double nSamplesPerWindow_f = physicalAnalysisWidth / my dx; double phase = (double) i / nSamplesPerWindow_f; // 0 .. 1 double value; switch (windowType) { case kSound_to_Spectrogram_windowShape_SQUARE: value = 1.0; break; case kSound_to_Spectrogram_windowShape_HAMMING: value = 0.54 - 0.46 * cos (2.0 * NUMpi * phase); break; case kSound_to_Spectrogram_windowShape_BARTLETT: value = 1.0 - fabs ((2.0 * phase - 1.0)); break; case kSound_to_Spectrogram_windowShape_WELCH: value = 1.0 - (2.0 * phase - 1.0) * (2.0 * phase - 1.0); break; case kSound_to_Spectrogram_windowShape_HANNING: value = 0.5 * (1.0 - cos (2.0 * NUMpi * phase)); break; case kSound_to_Spectrogram_windowShape_GAUSSIAN: { double imid = 0.5 * (double) (nsamp_window + 1), edge = exp (-12.0); phase = ((double) i - imid) / nSamplesPerWindow_f; /* -0.5 .. +0.5 */ value = (exp (-48.0 * phase * phase) - edge) / (1.0 - edge); break; } break; default: value = 1.0; } window [i] = (float) value; windowssq += value * value; } double oneByBinWidth = 1.0 / windowssq / binWidth_samples; for (long iframe = 1; iframe <= numberOfTimes; iframe ++) { double t = Sampled_indexToX (thee.peek(), iframe); long leftSample = Sampled_xToLowIndex (me, t), rightSample = leftSample + 1; long startSample = rightSample - halfnsamp_window; long endSample = leftSample + halfnsamp_window; Melder_assert (startSample >= 1); Melder_assert (endSample <= my nx); for (long i = 1; i <= half_nsampFFT; i ++) { spec [i] = 0.0; } for (long channel = 1; channel <= my ny; channel ++) { for (long j = 1, i = startSample; j <= nsamp_window; j ++) { frame [j] = my z [channel] [i ++] * window [j]; } for (long j = nsamp_window + 1; j <= nsampFFT; j ++) frame [j] = 0.0f; Melder_progress (iframe / (numberOfTimes + 1.0), U"Sound to Spectrogram: analysis of frame ", iframe, U" out of ", numberOfTimes); /* Compute Fast Fourier Transform of the frame. */ NUMfft_forward (& fftTable, frame.peek()); // complex spectrum /* Put power spectrum in frame [1..half_nsampFFT + 1]. */ spec [1] += frame [1] * frame [1]; // DC component for (long i = 2; i <= half_nsampFFT; i ++) spec [i] += frame [i + i - 2] * frame [i + i - 2] + frame [i + i - 1] * frame [i + i - 1]; spec [half_nsampFFT + 1] += frame [nsampFFT] * frame [nsampFFT]; // Nyquist frequency. Correct?? } if (my ny > 1 ) for (long i = 1; i <= half_nsampFFT; i ++) { spec [i] /= my ny; } /* Bin into frame [1..nBands]. */ for (long iband = 1; iband <= numberOfFreqs; iband ++) { long leftsample = (iband - 1) * binWidth_samples + 1, rightsample = leftsample + binWidth_samples; float power = 0.0f; for (long i = leftsample; i < rightsample; i ++) power += spec [i]; thy z [iband] [iframe] = power * oneByBinWidth; } } return thee; } catch (MelderError) { Melder_throw (me, U": spectrogram analysis not performed."); } }
PowerCepstrogram Sound_to_PowerCepstrogram_hillenbrand (Sound me, double minimumPitch, double dt) { try { // minimum analysis window has 3 periods of lowest pitch double analysisWidth = 3 / minimumPitch; if (analysisWidth > my dx * my nx) { analysisWidth = my dx * my nx; } double t1, samplingFrequency = 1 / my dx; autoSound thee; if (samplingFrequency > 30000) { samplingFrequency = samplingFrequency / 2; thee.reset (Sound_resample (me, samplingFrequency, 1)); } else { thee.reset (Data_copy (me)); } // pre-emphasis with fixed coefficient 0.9 for (long i = thy nx; i > 1; i--) { thy z[1][i] -= 0.9 * thy z[1][i - 1]; } long nosInWindow = analysisWidth * samplingFrequency, nFrames; if (nosInWindow < 8) { Melder_throw ("Analysis window too short."); } Sampled_shortTermAnalysis (thee.peek(), analysisWidth, dt, & nFrames, & t1); autoNUMvector<double> hamming (1, nosInWindow); for (long i = 1; i <= nosInWindow; i++) { hamming[i] = 0.54 -0.46 * cos(2 * NUMpi * (i - 1) / (nosInWindow - 1)); } long nfft = 8; // minimum possible while (nfft < nosInWindow) { nfft *= 2; } long nfftdiv2 = nfft / 2; autoNUMvector<double> fftbuf (1, nfft); // "complex" array autoNUMvector<double> spectrum (1, nfftdiv2 + 1); // +1 needed autoNUMfft_Table fftTable; NUMfft_Table_init (&fftTable, nfft); // sound to spectrum double qmax = 0.5 * nfft / samplingFrequency, dq = qmax / (nfftdiv2 + 1); autoPowerCepstrogram him = PowerCepstrogram_create (my xmin, my xmax, nFrames, dt, t1, 0, qmax, nfftdiv2+1, dq, 0); autoMelderProgress progress (L"Cepstrogram analysis"); for (long iframe = 1; iframe <= nFrames; iframe++) { double tbegin = t1 + (iframe - 1) * dt - analysisWidth / 2; tbegin = tbegin < thy xmin ? thy xmin : tbegin; long istart = Sampled_xToIndex (thee.peek(), tbegin); istart = istart < 1 ? 1 : istart; long iend = istart + nosInWindow - 1; iend = iend > thy nx ? thy nx : iend; for (long i = 1; i <= nosInWindow; i++) { fftbuf[i] = thy z[1][istart + i - 1] * hamming[i]; } for (long i = nosInWindow + 1; i <= nfft; i++) { fftbuf[i] = 0; } NUMfft_forward (&fftTable, fftbuf.peek()); complexfftoutput_to_power (fftbuf.peek(), nfft, spectrum.peek(), true); // log10(|fft|^2) // subtract average double specmean = spectrum[1]; for (long i = 2; i <= nfftdiv2 + 1; i++) { specmean += spectrum[i]; } specmean /= nfftdiv2 + 1; for (long i = 1; i <= nfftdiv2 + 1; i++) { spectrum[i] -= specmean; } /* * Here we diverge from Hillenbrand as he takes the fft of half of the spectral values. * H. forgets that the actual spectrum has nfft/2+1 values. Thefore, we take the inverse * transform because this keeps the number of samples a power of 2. * At the same time this results in twice as much numbers in the quefrency domain, i.e. we end with nfft/2+1 * numbers while H. has only nfft/4! */ fftbuf[1] = spectrum[1]; for (long i = 2; i < nfftdiv2 + 1; i++) { fftbuf[i+i-2] = spectrum[i]; fftbuf[i+i-1] = 0; } fftbuf[nfft] = spectrum[nfftdiv2 + 1]; NUMfft_backward (&fftTable, fftbuf.peek()); for (long i = 1; i <= nfftdiv2 + 1; i++) { his z[i][iframe] = fftbuf[i] * fftbuf[i]; } if ((iframe % 10) == 1) { Melder_progress ((double) iframe / nFrames, L"Cepstrogram analysis of frame ", Melder_integer (iframe), L" out of ", Melder_integer (nFrames), L"."); } } return him.transfer(); } catch (MelderError) { Melder_throw (me, ": no Cepstrogram created."); } }
Pitch Sound_to_Pitch_any (Sound me, double dt, /*timeStepStradygy related*/ double minimumPitch, /*Pitch settings realted*/ double periodsPerWindow, /*kTimeSoundAnalysisEditor_pitch_analysisMethod related*/ int maxnCandidates, int method, /*method related*/ double silenceThreshold, double voicingThreshold, double octaveCost, double octaveJumpCost, double voicedUnvoicedCost, double ceiling) { NUMfft_Table fftTable = NUMfft_Table_create(); double duration, t1; double dt_window; /* Window length in seconds. */ long nsamp_window, halfnsamp_window; /* Number of samples per window. */ long nFrames, minimumLag, maximumLag; long iframe, nsampFFT; double interpolation_depth; long nsamp_period, halfnsamp_period; /* Number of samples in longest period. */ long brent_ixmax, brent_depth; double brent_accuracy; /* Obsolete. */ double globalPeak; if (maxnCandidates < 2 || method < AC_HANNING && method > FCC_ACCURATE) { std::cout<<"Error: maxnCandidates: "<<maxnCandidates<<" method: "<<method<<"."<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 13. 69"<<std::endl; return NULL; } if (maxnCandidates < ceiling / minimumPitch) maxnCandidates = ceiling / minimumPitch; if (dt <= 0.0) dt = periodsPerWindow / minimumPitch / 4.0; /* e.g. 3 periods, 75 Hz: 10 milliseconds. */ switch (method) { case AC_HANNING: brent_depth = NUM_PEAK_INTERPOLATE_SINC70; brent_accuracy = 1e-7; interpolation_depth = 0.5; break; case AC_GAUSS: periodsPerWindow *= 2; /* Because Gaussian window is twice as long. */ brent_depth = NUM_PEAK_INTERPOLATE_SINC700; brent_accuracy = 1e-11; interpolation_depth = 0.25; /* Because Gaussian window is twice as long. */ break; case FCC_NORMAL: brent_depth = NUM_PEAK_INTERPOLATE_SINC70; brent_accuracy = 1e-7; interpolation_depth = 1.0; break; case FCC_ACCURATE: brent_depth = NUM_PEAK_INTERPOLATE_SINC700; brent_accuracy = 1e-11; interpolation_depth = 1.0; break; } duration = my dx * my nx; if (minimumPitch < periodsPerWindow / duration) { std::cout<<"To analyse this Sound, minimum pitch must not be less than "<< periodsPerWindow / duration<<" Hz."<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.103"<<std::endl; return NULL; } /* * Determine the number of samples in the longest period. * We need this to compute the local mean of the sound (looking one period in both directions), * and to compute the local peak of the sound (looking half a period in both directions). */ nsamp_period = floor(1 / my dx / minimumPitch); halfnsamp_period = nsamp_period / 2 + 1; if (ceiling > 0.5 / my dx) ceiling = 0.5 / my dx; // Determine window length in seconds and in samples. dt_window = periodsPerWindow / minimumPitch; nsamp_window = floor (dt_window / my dx); halfnsamp_window = nsamp_window / 2 - 1; if (halfnsamp_window < 2){ std::cout<<"Analysis window too short."<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.123"<<std::endl; return NULL; } nsamp_window = halfnsamp_window * 2; // Determine the minimum and maximum lags. minimumLag = floor (1 / my dx / ceiling); if (minimumLag < 2) minimumLag = 2; maximumLag = floor (nsamp_window / periodsPerWindow) + 2; if (maximumLag > nsamp_window) maximumLag = nsamp_window; /* * Determine the number of frames. * Fit as many frames as possible symmetrically in the total duration. * We do this even for the forward cross-correlation method, * because that allows us to compare the two methods. */ if(!Sampled_shortTermAnalysis (me, method >= FCC_NORMAL ? 1 / minimumPitch + dt_window : dt_window, dt, & nFrames, & t1)){ std::cout<<"The pitch analysis would give zero pitch frames."<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.142"<<std::endl; return NULL; } // Create the resulting pitch contour. Pitch thee = Pitch_create (my xmin, my xmax, nFrames, dt, t1, ceiling, maxnCandidates); // Compute the global absolute peak for determination of silence threshold. globalPeak = 0.0; for (long channel = 1; channel <= my ny; channel ++) { double mean = 0.0; for (long i = 1; i <= my nx; i ++) { mean += my z [channel] [i]; } mean /= my nx; for (long i = 1; i <= my nx; i ++) { double value = fabs (my z [channel] [i] - mean); if (value > globalPeak) globalPeak = value; } } if (globalPeak == 0.0) return thee; double **frame, *ac, *window, *windowR; if (method >= FCC_NORMAL) { /* For cross-correlation analysis. */ // Create buffer for cross-correlation analysis. frame = (double **)malloc(sizeof(double *) * (my ny + 1)); for(long i = 1; i <= my ny; ++ i){ frame[i] = (double *)malloc(sizeof(double) * (nsamp_window + 1)); for(long j = 1; j <= nsamp_window; ++ j) frame[i][j] = 0.0; } /****frame.reset (1, my ny, 1, nsamp_window);****/ brent_ixmax = nsamp_window * interpolation_depth; } else { /* For autocorrelation analysis. */ /* * Compute the number of samples needed for doing FFT. * To avoid edge effects, we have to append zeroes to the window. * The maximum lag considered for maxima is maximumLag. * The maximum lag used in interpolation is nsamp_window * interpolation_depth. */ nsampFFT = 1; while (nsampFFT < nsamp_window * (1 + interpolation_depth)) nsampFFT *= 2; // Create buffers for autocorrelation analysis. frame = (double **)malloc(sizeof(double *) * (my ny + 1)); for(long i = 1; i <= my ny; ++ i){ frame [i] = (double *)malloc(sizeof(double) * (nsampFFT + 1)); for(long j = 0; j <= nsampFFT; ++ j) frame[i][j] = 0.0; } /****frame.reset (1, my ny, 1, nsampFFT);****/ window = (double *)malloc(sizeof(double) * (nsamp_window + 1)); for(long i = 0; i <= nsamp_window; ++ i) window[i] = 0.0; /****window.reset (1, nsamp_window);****/ windowR = (double *)malloc(sizeof(double) * (nsampFFT + 1)); ac = (double *)malloc(sizeof(double) * (nsampFFT + 1)); for(long i = 0; i <= nsampFFT; ++ i) windowR[i] = ac[i] = 0.0; /****windowR.reset (1, nsampFFT); ac.reset (1, nsampFFT); ****/ NUMfft_Table_init (fftTable, nsampFFT); /* * A Gaussian or Hanning window is applied against phase effects. * The Hanning window is 2 to 5 dB better for 3 periods/window. * The Gaussian window is 25 to 29 dB better for 6 periods/window. */ if (method == AC_GAUSS) { /* Gaussian window. */ double imid = 0.5 * (nsamp_window + 1), edge = exp (-12.0); for (long i = 1; i <= nsamp_window; i ++) window[i] = (exp(-48.0*(i-imid)*(i-imid) / (nsamp_window + 1) / (nsamp_window + 1)) - edge) / (1 - edge); } else { /* Hanning window*/ for (long i = 1; i <= nsamp_window; i ++) window [i] = 0.5 - 0.5 * cos (i * 2 * NUMpi / (nsamp_window + 1)); } // Compute the normalized autocorrelation of the window. for (long i = 1; i <= nsamp_window; i ++) windowR [i] = window [i]; NUMfft_forward (fftTable, windowR); windowR [1] *= windowR [1]; // DC component for (long i = 2; i < nsampFFT; i += 2) { windowR [i] = windowR [i] * windowR [i] + windowR [i+1] * windowR [i+1]; windowR [i + 1] = 0.0; // power spectrum: square and zero } windowR [nsampFFT] *= windowR [nsampFFT]; // Nyquist frequency NUMfft_backward (fftTable, windowR); // autocorrelation for (long i = 2; i <= nsamp_window; i ++) windowR [i] /= windowR [1]; // normalize windowR [1] = 1.0; // normalize brent_ixmax = nsamp_window * interpolation_depth; } double *r = (double *) malloc( sizeof(double) * (2 * (nsamp_window + 1) + 1) ); r += nsamp_window + 1; //make "r" become a symetrical vectr long *imax = (long *) malloc( sizeof(long) * (maxnCandidates + 1)); double *localMean = (double *) malloc( sizeof(double) * (my ny + 1)); for(iframe = 1; iframe <= nFrames; iframe ++){ Pitch_Frame pitchFrame = & thy frame[iframe]; double t = thy x1 + (iframe - 1) *(thy dx), localPeak; long leftSample = (long) floor((t - my x1) / my dx) + 1; long rightSample = leftSample + 1; long startSample, endSample; for(long channel = 1; channel <= my ny; ++ channel){ //Compute the local mean; look one longest period to both sides. startSample = rightSample - nsamp_period; endSample = leftSample + nsamp_period; if ( startSample < 0 ) { std::cout<<"StartSample < 1"<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31"<<std::endl; return NULL; } if (endSample > my nx){ std::cout<<"EndSample > my nx"<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.262"<<std::endl; return NULL; } localMean[channel] = 0.0; for (long i = startSample; i <= endSample; i ++) { localMean[channel] += my z[channel][i]; } localMean[channel] /= 2 * nsamp_period; // Copy a window to a frame and subtract the local mean. We are going to kill the DC component before windowing. startSample = rightSample - halfnsamp_window; endSample = leftSample + halfnsamp_window; if ( startSample < 1 ) { std::cout<<"StartSample < 1"<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.281"<<std::endl; return NULL; } if (endSample > my nx){ std::cout<<"EndSample > my nx"<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.287"<<std::endl; return NULL; } if (method < FCC_NORMAL) { for (long j = 1, i = startSample; j <= nsamp_window; j ++) frame [channel] [j] = (my z [channel] [i ++] - localMean [channel]) * window [j]; for (long j = nsamp_window + 1; j <= nsampFFT; j ++) frame [channel] [j] = 0.0; } else { for (long j = 1, i = startSample; j <= nsamp_window; j ++) frame [channel] [j] = my z [channel] [i ++] - localMean [channel]; } } // Compute the local peak; look half a longest period to both sides. localPeak = 0.0; if ((startSample = halfnsamp_window + 1 - halfnsamp_period) < 1) startSample = 1; if ((endSample = halfnsamp_window + halfnsamp_period) > nsamp_window) endSample = nsamp_window; for (long channel = 1; channel <= my ny; channel ++) { for (long j = startSample; j <= endSample; j ++) { double value = fabs (frame [channel] [j]); if (value > localPeak) localPeak = value; } } pitchFrame->intensity = localPeak > globalPeak ? 1.0 : localPeak / globalPeak; // Compute the correlation into the array 'r'. if (method >= FCC_NORMAL) { double startTime = t - 0.5 * (1.0 / minimumPitch + dt_window); long localSpan = maximumLag + nsamp_window, localMaximumLag, offset; if ((startSample = (long) floor ((startTime - my x1) / my dx)) + 1 < 1) startSample = 1; if (localSpan > my nx + 1 - startSample) localSpan = my nx + 1 - startSample; localMaximumLag = localSpan - nsamp_window; offset = startSample - 1; double sumx2 = 0; /* Sum of squares. */ for (long channel = 1; channel <= my ny; channel ++) { ///channel = 1; channel <= my ny double *amp = my z[channel] + offset; for (long i = 1; i <= nsamp_window; i ++) { ///i = 1; i <= nsamp_window double x = amp[i] - localMean[channel]; sumx2 += x * x; } } double sumy2 = sumx2; /* At zero lag, these are still equal. */ r[0] = 1.0; for (long i = 1; i <= localMaximumLag; i ++) { double product = 0.0; for (long channel = 1; channel <= my ny; channel ++) { ///channel = 1; channel <= my ny double *amp = my z[channel] + offset; double y0 = amp[i] - localMean[channel]; double yZ = amp[i + nsamp_window] - localMean[channel]; sumy2 += yZ * yZ - y0 * y0; for (long j = 1; j <= nsamp_window; j ++) { ///j = 1; j <= nsamp_window double x = amp[j] - localMean[channel]; double y = amp[i + j] - localMean[channel]; product += x * y; } } r[- i] = r[i] = product / sqrt (sumx2 * sumy2); } } else { // The FFT of the autocorrelation is the power spectrum. for (long i = 1; i <= nsampFFT; i ++) ac [i] = 0.0; for (long channel = 1; channel <= my ny; channel ++) { NUMfft_forward (fftTable, frame [channel]); /* Complex spectrum. */ ac [1] += frame [channel] [1] * frame [channel] [1]; /* DC component. */ for (long i = 2; i < nsampFFT; i += 2) { ac [i] += frame [channel] [i] * frame [channel] [i] + frame [channel] [i+1] * frame [channel] [i+1]; /* Power spectrum. */ } ac [nsampFFT] += frame [channel] [nsampFFT] * frame [channel] [nsampFFT]; /* Nyquist frequency. */ } NUMfft_backward (fftTable, ac); /* Autocorrelation. */ /* * Normalize the autocorrelation to the value with zero lag, * and divide it by the normalized autocorrelation of the window. */ r [0] = 1.0; for (long i = 1; i <= brent_ixmax; i ++) r [- i] = r [i] = ac [i + 1] / (ac [1] * windowR [i + 1]); } // Create (too much) space for candidates Pitch_Frame_init (pitchFrame, maxnCandidates); // Register the first candidate, which is always present: voicelessness. pitchFrame->nCandidates = 1; pitchFrame->candidate[1].frequency = 0.0; /* Voiceless: always present. */ pitchFrame->candidate[1].strength = 0.0; /* * Shortcut: absolute silence is always voiceless. * Go to next frame. */ if (localPeak == 0) continue; /* * Find the strongest maxima of the correlation of this frame, * and register them as candidates. */ imax[1] = 0; for (long i = 2; i < maximumLag && i < brent_ixmax; i ++) if (r[i] > 0.5 * voicingThreshold && /* Not too unvoiced? */ r[i] > r[i-1] && r[i] >= r[i+1]) /* Maximum ? */ { int place = 0; // Use parabolic interpolation for first estimate of frequency,and sin(x)/x interpolation to compute the strength of this frequency. double dr = 0.5 * (r[i+1] - r[i-1]); double d2r = 2 * r[i] - r[i-1] - r[i+1]; double frequencyOfMaximum = 1 / my dx / (i + dr / d2r); long offset = - brent_ixmax - 1; double strengthOfMaximum = /* method & 1 ? */ NUM_interpolate_sinc (& r[offset], brent_ixmax - offset, 1 / my dx / frequencyOfMaximum - offset, 30) /* : r [i] + 0.5 * dr * dr / d2r */; /* High values due to short windows are to be reflected around 1. */ if (strengthOfMaximum > 1.0) strengthOfMaximum = 1.0 / strengthOfMaximum; // Find a place for this maximum. if (pitchFrame->nCandidates < thy maxnCandidates) { /* Is there still a free place? */ place = ++ pitchFrame->nCandidates; } else { /* Try the place of the weakest candidate so far. */ double weakest = 2; for (int iweak = 2; iweak <= thy maxnCandidates; iweak ++) { //iweak = 2; iweak <= thy maxnCandidates; /* High frequencies are to be favoured */ /* if we want to analyze a perfectly periodic signal correctly. */ double localStrength = pitchFrame->candidate[iweak].strength - octaveCost * NUMlog2 (minimumPitch / pitchFrame->candidate[iweak].frequency); if (localStrength < weakest) { weakest = localStrength; place = iweak; } } /* If this maximum is weaker than the weakest candidate so far, give it no place. */ if (strengthOfMaximum - octaveCost * NUMlog2 (minimumPitch / frequencyOfMaximum) <= weakest) place = 0; } if (place) { /* Have we found a place for this candidate? */ pitchFrame->candidate[place].frequency = frequencyOfMaximum; pitchFrame->candidate[place].strength = strengthOfMaximum; imax [place] = i; } } // Second pass: for extra precision, maximize sin(x)/x interpolation ('sinc'). for (long i = 2; i <= pitchFrame->nCandidates; i ++) { if (method != AC_HANNING || pitchFrame->candidate[i].frequency > 0.0 / my dx) { double xmid, ymid; long offset = - brent_ixmax - 1; ymid = NUMimproveMaximum (& r[offset], brent_ixmax - offset, imax[i] - offset, pitchFrame->candidate[i].frequency > 0.3 / my dx ? NUM_PEAK_INTERPOLATE_SINC700 : brent_depth, & xmid); xmid += offset; pitchFrame->candidate[i].frequency = 1.0 / my dx / xmid; if (ymid > 1.0) ymid = 1.0 / ymid; pitchFrame->candidate[i].strength = ymid; } } } /* Next frame. */ Pitch_pathFinder (thee, silenceThreshold, voicingThreshold,octaveCost, octaveJumpCost, voicedUnvoicedCost, ceiling, false); //false: Melder_debug == 31 ? true : false Melder_debug 31: Pitch analysis: formant pulling on return thee; }
autoSpectrum Sound_to_Spectrum (Sound me, int fast) { try { long numberOfSamples = my nx; const long numberOfChannels = my ny; if (fast) { numberOfSamples = 2; while (numberOfSamples < my nx) numberOfSamples *= 2; } long numberOfFrequencies = numberOfSamples / 2 + 1; // 4 samples -> cos0 cos1 sin1 cos2; 5 samples -> cos0 cos1 sin1 cos2 sin2 autoNUMvector <double> data (1, numberOfSamples); if (numberOfChannels == 1) { const double *channel = my z [1]; for (long i = 1; i <= my nx; i ++) { data [i] = channel [i]; } /* All samples from `my nx + 1` through `numberOfSamples` should be set to zero, but they are already zero. */ // so do nothing } else { for (long ichan = 1; ichan <= numberOfChannels; ichan ++) { const double *channel = my z [ichan]; for (long i = 1; i <= my nx; i ++) { data [i] += channel [i]; } } for (long i = 1; i <= my nx; i ++) { data [i] /= numberOfChannels; } } autoNUMfft_Table fourierTable; NUMfft_Table_init (& fourierTable, numberOfSamples); NUMfft_forward (& fourierTable, data.peek()); autoSpectrum thee = Spectrum_create (0.5 / my dx, numberOfFrequencies); thy dx = 1.0 / (my dx * numberOfSamples); // override double *re = thy z [1]; double *im = thy z [2]; double scaling = my dx; re [1] = data [1] * scaling; im [1] = 0.0; for (long i = 2; i < numberOfFrequencies; i ++) { re [i] = data [i + i - 2] * scaling; // data [2], data [4], ... im [i] = data [i + i - 1] * scaling; // data [3], data [5], ... } if ((numberOfSamples & 1) != 0) { if (numberOfSamples > 1) { re [numberOfFrequencies] = data [numberOfSamples - 1] * scaling; im [numberOfFrequencies] = data [numberOfSamples] * scaling; } } else { re [numberOfFrequencies] = data [numberOfSamples] * scaling; im [numberOfFrequencies] = 0.0; } return thee; } catch (MelderError) { Melder_throw (me, U": not converted to Spectrum."); } }