static LPC _Sound_to_LPC (Sound me, int predictionOrder, double analysisWidth, double dt, double preEmphasisFrequency, int method, double tol1, double tol2) { double t1, samplingFrequency = 1.0 / my dx; double windowDuration = 2 * analysisWidth; /* gaussian window */ long nFrames, frameErrorCount = 0; if (floor (windowDuration / my dx) < predictionOrder + 1) Melder_throw ("Analysis window duration too short.\n" "For a prediction order of ", predictionOrder, " the analysis window duration has to be greater than ", my dx * (predictionOrder + 1), "Please increase the analysis window duration or lower the prediction order."); // Convenience: analyse the whole sound into one LPC_frame if (windowDuration > my dx * my nx) { windowDuration = my dx * my nx; } Sampled_shortTermAnalysis (me, windowDuration, dt, & nFrames, & t1); autoSound sound = Data_copy (me); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoLPC thee = LPC_create (my xmin, my xmax, nFrames, dt, t1, predictionOrder, my dx); autoMelderProgress progress (L"LPC analysis"); if (preEmphasisFrequency < samplingFrequency / 2) { Sound_preEmphasis (sound.peek(), preEmphasisFrequency); } for (long i = 1; i <= nFrames; i++) { LPC_Frame lpcframe = (LPC_Frame) & thy d_frames[i]; double t = Sampled_indexToX (thee.peek(), i); LPC_Frame_init (lpcframe, predictionOrder); Sound_into_Sound (sound.peek(), sframe.peek(), t - windowDuration / 2); Vector_subtractMean (sframe.peek()); Sounds_multiply (sframe.peek(), window.peek()); if (method == LPC_METHOD_AUTO) { if (! Sound_into_LPC_Frame_auto (sframe.peek(), lpcframe)) { frameErrorCount++; } } else if (method == LPC_METHOD_COVAR) { if (! Sound_into_LPC_Frame_covar (sframe.peek(), lpcframe)) { frameErrorCount++; } } else if (method == LPC_METHOD_BURG) { if (! Sound_into_LPC_Frame_burg (sframe.peek(), lpcframe)) { frameErrorCount++; } } else if (method == LPC_METHOD_MARPLE) { if (! Sound_into_LPC_Frame_marple (sframe.peek(), lpcframe, tol1, tol2)) { frameErrorCount++; } } if ( (i % 10) == 1) { Melder_progress ( (double) i / nFrames, L"LPC analysis of frame ", Melder_integer (i), L" out of ", Melder_integer (nFrames), L"."); } } return thee.transfer(); }
autoBarkSpectrogram Sound_to_BarkSpectrogram (Sound me, double analysisWidth, double dt, double f1_bark, double fmax_bark, double df_bark) { try { double nyquist = 0.5 / my dx, samplingFrequency = 2 * nyquist; double windowDuration = 2 * analysisWidth; /* gaussian window */ double zmax = NUMhertzToBark2 (nyquist); double fmin_bark = 0; // Check defaults. if (f1_bark <= 0) { f1_bark = 1; } if (fmax_bark <= 0) { fmax_bark = zmax; } if (df_bark <= 0) { df_bark = 1; } fmax_bark = MIN (fmax_bark, zmax); long numberOfFilters = lround ( (fmax_bark - f1_bark) / df_bark); if (numberOfFilters <= 0) { Melder_throw (U"The combination of filter parameters is not valid."); } long numberOfFrames; double t1; Sampled_shortTermAnalysis (me, windowDuration, dt, & numberOfFrames, & t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoBarkSpectrogram thee = BarkSpectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_bark, fmax_bark, numberOfFilters, df_bark, f1_bark); autoMelderProgress progess (U"BarkSpectrogram analysis"); for (long iframe = 1; iframe <= numberOfFrames; iframe++) { double t = Sampled_indexToX (thee.get(), iframe); Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0); Sounds_multiply (sframe.get(), window.get()); Sound_into_BarkSpectrogram_frame (sframe.get(), thee.get(), iframe); if (iframe % 10 == 1) { Melder_progress ( (double) iframe / numberOfFrames, U"BarkSpectrogram analysis: frame ", iframe, U" from ", numberOfFrames, U"."); } } _Spectrogram_windowCorrection ((Spectrogram) thee.get(), window -> nx); return thee; } catch (MelderError) { Melder_throw (me, U": no BarkSpectrogram created."); } }
autoMelSpectrogram Sound_to_MelSpectrogram (Sound me, double analysisWidth, double dt, double f1_mel, double fmax_mel, double df_mel) { try { double t1, samplingFrequency = 1.0 / my dx, nyquist = 0.5 * samplingFrequency; double windowDuration = 2.0 * analysisWidth; // gaussian window double fmin_mel = 0.0; double fbottom = NUMhertzToMel2 (100.0), fceiling = NUMhertzToMel2 (nyquist); long numberOfFrames; // Check defaults. if (fmax_mel <= 0.0 || fmax_mel > fceiling) { fmax_mel = fceiling; } if (fmax_mel <= f1_mel) { f1_mel = fbottom; fmax_mel = fceiling; } if (f1_mel <= 0.0) { f1_mel = fbottom; } if (df_mel <= 0.0) { df_mel = 100.0; } // Determine the number of filters. long numberOfFilters = lround ((fmax_mel - f1_mel) / df_mel); fmax_mel = f1_mel + numberOfFilters * df_mel; Sampled_shortTermAnalysis (me, windowDuration, dt, &numberOfFrames, &t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoMelSpectrogram thee = MelSpectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_mel, fmax_mel, numberOfFilters, df_mel, f1_mel); autoMelderProgress progress (U"MelSpectrograms analysis"); for (long iframe = 1; iframe <= numberOfFrames; iframe++) { double t = Sampled_indexToX (thee.get(), iframe); Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0); Sounds_multiply (sframe.get(), window.get()); Sound_into_MelSpectrogram_frame (sframe.get(), thee.get(), iframe); if (iframe % 10 == 1) { Melder_progress ((double) iframe / numberOfFrames, U"Frame ", iframe, U" out of ", numberOfFrames, U"."); } } _Spectrogram_windowCorrection ((Spectrogram) thee.get(), window -> nx); return thee; } catch (MelderError) { Melder_throw (me, U": no MelSpectrogram created."); } }
PowerCepstrogram Sound_to_PowerCepstrogram (Sound me, double pitchFloor, double dt, double maximumFrequency, double preEmphasisFrequency) { try { // minimum analysis window has 3 periods of lowest pitch double analysisWidth = 3 / pitchFloor; double windowDuration = 2 * analysisWidth; /* gaussian window */ long nFrames; // Convenience: analyse the whole sound into one Cepstrogram_frame if (windowDuration > my dx * my nx) { windowDuration = my dx * my nx; } double t1, samplingFrequency = 2 * maximumFrequency; autoSound sound = Sound_resample (me, samplingFrequency, 50); Sound_preEmphasis (sound.peek(), preEmphasisFrequency); Sampled_shortTermAnalysis (me, windowDuration, dt, & nFrames, & t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); // find out the size of the FFT long nfft = 2; while (nfft < sframe -> nx) nfft *= 2; long nq = nfft / 2 + 1; double qmax = 0.5 * nfft / samplingFrequency, dq = qmax / (nq - 1); autoPowerCepstrogram thee = PowerCepstrogram_create (my xmin, my xmax, nFrames, dt, t1, 0, qmax, nq, dq, 0); autoMelderProgress progress (L"Cepstrogram analysis"); for (long iframe = 1; iframe <= nFrames; iframe++) { double t = Sampled_indexToX (thee.peek(), iframe); Sound_into_Sound (sound.peek(), sframe.peek(), t - windowDuration / 2); Vector_subtractMean (sframe.peek()); Sounds_multiply (sframe.peek(), window.peek()); autoSpectrum spec = Sound_to_Spectrum (sframe.peek(), 1); // FFT yes autoPowerCepstrum cepstrum = Spectrum_to_PowerCepstrum (spec.peek()); for (long i = 1; i <= nq; i++) { thy z[i][iframe] = cepstrum -> z[1][i]; } if ((iframe % 10) == 1) { Melder_progress ((double) iframe / nFrames, L"PowerCepstrogram analysis of frame ", Melder_integer (iframe), L" out of ", Melder_integer (nFrames), L"."); } } return thee.transfer(); } catch (MelderError) { Melder_throw (me, ": no PowerCepstrogram created."); } }
Cepstrogram Sound_to_Cepstrogram (Sound me, double analysisWidth, double dt, double maximumFrequency) { try { double windowDuration = 2 * analysisWidth; /* gaussian window */ long nFrames; // Convenience: analyse the whole sound into one Cepstrogram_frame if (windowDuration > my dx * my nx) { windowDuration = my dx * my nx; } double t1, samplingFrequency = 2 * maximumFrequency; autoSound sound = Sound_resample (me, samplingFrequency, 50); Sampled_shortTermAnalysis (me, windowDuration, dt, & nFrames, & t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); double qmin, qmax, dq, q1; long nq; { // laziness: find out the proper dimensions autoSpectrum spec = Sound_to_Spectrum (sframe.peek(), 1); autoCepstrum cepstrum = Spectrum_to_Cepstrum (spec.peek()); qmin = cepstrum -> xmin; qmax = cepstrum -> xmax; dq = cepstrum -> dx; q1 = cepstrum -> x1; nq = cepstrum -> nx; } autoCepstrogram thee = Cepstrogram_create (my xmin, my xmax, nFrames, dt, t1, qmin, qmax, nq, dq, q1); autoMelderProgress progress (L"Cepstrogram analysis"); for (long iframe = 1; iframe <= nFrames; iframe++) { double t = Sampled_indexToX (thee.peek(), iframe); Sound_into_Sound (sound.peek(), sframe.peek(), t - windowDuration / 2); Vector_subtractMean (sframe.peek()); Sounds_multiply (sframe.peek(), window.peek()); autoSpectrum spec = Sound_to_Spectrum (sframe.peek(), 1); autoCepstrum cepstrum = Spectrum_to_Cepstrum (spec.peek()); for (long i = 1; i <= nq; i++) { thy z[i][iframe] = cepstrum -> z[1][i]; } if ((iframe % 10) == 1) { Melder_progress ((double) iframe / nFrames, L"Cepstrogram analysis of frame ", Melder_integer (iframe), L" out of ", Melder_integer (nFrames), L"."); } } return thee.transfer(); } catch (MelderError) { Melder_throw (me, ": no Cepstrogram created."); } }
BarkFilter Sound_to_BarkFilter (Sound me, double analysisWidth, double dt, double f1_bark, double fmax_bark, double df_bark) { try { double t1, nyquist = 0.5 / my dx, samplingFrequency = 2 * nyquist; double windowDuration = 2 * analysisWidth; /* gaussian window */ double zmax = NUMhertzToBark2 (nyquist); double fmin_bark = 0; long nt, frameErrorCount = 0; // Check defaults. if (f1_bark <= 0) { f1_bark = 1; } if (fmax_bark <= 0) { fmax_bark = zmax; } if (df_bark <= 0) { df_bark = 1; } fmax_bark = MIN (fmax_bark, zmax); long nf = floor ( (fmax_bark - f1_bark) / df_bark + 0.5); if (nf <= 0) { Melder_throw ("The combination of filter parameters is not valid."); } Sampled_shortTermAnalysis (me, windowDuration, dt, & nt, & t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoBarkFilter thee = BarkFilter_create (my xmin, my xmax, nt, dt, t1, fmin_bark, fmax_bark, nf, df_bark, f1_bark); autoMelderProgress progess (L"BarkFilter analysis"); for (long i = 1; i <= nt; i++) { double t = Sampled_indexToX (thee.peek(), i); Sound_into_Sound (me, sframe.peek(), t - windowDuration / 2); Sounds_multiply (sframe.peek(), window.peek()); if (! Sound_into_BarkFilter_frame (sframe.peek(), thee.peek(), i)) { frameErrorCount++; } if ( (i % 10) == 1) { Melder_progress ( (double) i / nt, L"BarkFilter analysis: frame ", Melder_integer (i), L" from ", Melder_integer (nt), L"."); therror } } if (frameErrorCount > 0) { Melder_warning (L"Analysis results of ", Melder_integer (frameErrorCount), L" frame(s) out of ", Melder_integer (nt), L" will be suspect."); } double ref = FilterBank_DBREF * gaussian_window_squared_correction (window -> nx); NUMdmatrix_to_dBs (thy z, 1, thy ny, 1, thy nx, ref, FilterBank_DBFAC, FilterBank_DBFLOOR); return thee.transfer(); } catch (MelderError) {
FormantFilter Sound_and_Pitch_to_FormantFilter (Sound me, Pitch thee, double analysisWidth, double dt, double f1_hz, double fmax_hz, double df_hz, double relative_bw) { try { double t1, windowDuration = 2 * analysisWidth; /* gaussian window */ double nyquist = 0.5 / my dx, samplingFrequency = 2 * nyquist, fmin_hz = 0; long nt, f0_undefined = 0; if (my xmin > thy xmin || my xmax > thy xmax) Melder_throw ("The domain of the Sound is not included in the domain of the Pitch."); double f0_median = Pitch_getQuantile (thee, thy xmin, thy xmax, 0.5, kPitch_unit_HERTZ); if (f0_median == NUMundefined || f0_median == 0) { f0_median = 100; Melder_warning (L"Pitch values undefined. Bandwith fixed to 100 Hz. "); } if (f1_hz <= 0) { f1_hz = 100; } if (fmax_hz <= 0) { fmax_hz = nyquist; } if (df_hz <= 0) { df_hz = f0_median / 2; } if (relative_bw <= 0) { relative_bw = 1.1; } fmax_hz = MIN (fmax_hz, nyquist); long nf = floor ( (fmax_hz - f1_hz) / df_hz + 0.5); Sampled_shortTermAnalysis (me, windowDuration, dt, &nt, &t1); autoFormantFilter him = FormantFilter_create (my xmin, my xmax, nt, dt, t1, fmin_hz, fmax_hz, nf, df_hz, f1_hz); // Temporary objects autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoMelderProgress progress (L"Sound & Pitch: To FormantFilter"); for (long i = 1; i <= nt; i++) { double t = Sampled_indexToX (him.peek(), i); double b, f0 = Pitch_getValueAtTime (thee, t, kPitch_unit_HERTZ, 0); if (f0 == NUMundefined || f0 == 0) { f0_undefined++; f0 = f0_median; } b = relative_bw * f0; Sound_into_Sound (me, sframe.peek(), t - windowDuration / 2); Sounds_multiply (sframe.peek(), window.peek()); Sound_into_FormantFilter_frame (sframe.peek(), him.peek(), i, b); if ( (i % 10) == 1) { Melder_progress ( (double) i / nt, L"Frame ", Melder_integer (i), L" out of ", Melder_integer (nt), L"."); } } double ref = FilterBank_DBREF * gaussian_window_squared_correction (window -> nx); NUMdmatrix_to_dBs (his z, 1, his ny, 1, his nx, ref, FilterBank_DBFAC, FilterBank_DBFLOOR); return him.transfer(); } catch (MelderError) { Melder_throw ("FormantFilter not created from Pitch & FormantFilter."); } }
MelFilter Sound_to_MelFilter (Sound me, double analysisWidth, double dt, double f1_mel, double fmax_mel, double df_mel) { try { double t1, samplingFrequency = 1 / my dx, nyquist = 0.5 * samplingFrequency; double windowDuration = 2 * analysisWidth; /* gaussian window */ double fmin_mel = 0; double fbottom = HZTOMEL (100.0), fceiling = HZTOMEL (nyquist); long nt, frameErrorCount = 0; // Check defaults. if (fmax_mel <= 0 || fmax_mel > fceiling) { fmax_mel = fceiling; } if (fmax_mel <= f1_mel) { f1_mel = fbottom; fmax_mel = fceiling; } if (f1_mel <= 0) { f1_mel = fbottom; } if (df_mel <= 0) { df_mel = 100.0; } // Determine the number of filters. long nf = floor ( (fmax_mel - f1_mel) / df_mel + 0.5); fmax_mel = f1_mel + nf * df_mel; Sampled_shortTermAnalysis (me, windowDuration, dt, &nt, &t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoMelFilter thee = MelFilter_create (my xmin, my xmax, nt, dt, t1, fmin_mel, fmax_mel, nf, df_mel, f1_mel); autoMelderProgress progress (L"MelFilters analysis"); for (long i = 1; i <= nt; i++) { double t = Sampled_indexToX (thee.peek(), i); Sound_into_Sound (me, sframe.peek(), t - windowDuration / 2); Sounds_multiply (sframe.peek(), window.peek()); if (! Sound_into_MelFilter_frame (sframe.peek(), thee.peek(), i)) { frameErrorCount++; } if ( (i % 10) == 1) { Melder_progress ( (double) i / nt, L"Frame ", Melder_integer (i), L" out of ", Melder_integer (nt), L"."); } } if (frameErrorCount) Melder_warning (L"Analysis results of ", Melder_integer (frameErrorCount), L" frame(s) out of ", Melder_integer (nt), L" will be suspect."); // Window correction. double ref = FilterBank_DBREF * gaussian_window_squared_correction (window -> nx); NUMdmatrix_to_dBs (thy z, 1, thy ny, 1, thy nx, ref, FilterBank_DBFAC, FilterBank_DBFLOOR); return thee.transfer(); } catch (MelderError) { Melder_throw (me, ": no MelFilter created."); } }
autoSpectrogram Sound_and_Pitch_to_Spectrogram (Sound me, Pitch thee, double analysisWidth, double dt, double f1_hz, double fmax_hz, double df_hz, double relative_bw) { try { double t1, windowDuration = 2.0 * analysisWidth; /* gaussian window */ double nyquist = 0.5 / my dx, samplingFrequency = 2.0 * nyquist, fmin_hz = 0.0; long numberOfFrames, f0_undefined = 0.0; if (my xmin > thy xmin || my xmax > thy xmax) Melder_throw (U"The domain of the Sound is not included in the domain of the Pitch."); double f0_median = Pitch_getQuantile (thee, thy xmin, thy xmax, 0.5, kPitch_unit_HERTZ); if (f0_median == NUMundefined || f0_median == 0.0) { f0_median = 100.0; Melder_warning (U"Pitch values undefined. Bandwith fixed to 100 Hz. "); } if (f1_hz <= 0.0) { f1_hz = 100.0; } if (fmax_hz <= 0.0) { fmax_hz = nyquist; } if (df_hz <= 0.0) { df_hz = f0_median / 2.0; } if (relative_bw <= 0.0) { relative_bw = 1.1; } fmax_hz = MIN (fmax_hz, nyquist); long numberOfFilters = lround ( (fmax_hz - f1_hz) / df_hz); Sampled_shortTermAnalysis (me, windowDuration, dt, &numberOfFrames, &t1); autoSpectrogram him = Spectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_hz, fmax_hz, numberOfFilters, df_hz, f1_hz); // Temporary objects autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoMelderProgress progress (U"Sound & Pitch: To FormantFilter"); for (long iframe = 1; iframe <= numberOfFrames; iframe++) { double t = Sampled_indexToX (him.get(), iframe); double b, f0 = Pitch_getValueAtTime (thee, t, kPitch_unit_HERTZ, 0); if (f0 == NUMundefined || f0 == 0.0) { f0_undefined ++; f0 = f0_median; } b = relative_bw * f0; Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0); Sounds_multiply (sframe.get(), window.get()); Sound_into_Spectrogram_frame (sframe.get(), him.get(), iframe, b); if (iframe % 10 == 1) { Melder_progress ( (double) iframe / numberOfFrames, U"Frame ", iframe, U" out of ", numberOfFrames, U"."); } } _Spectrogram_windowCorrection (him.get(), window -> nx); return him; } catch (MelderError) { Melder_throw (U"FormantFilter not created from Pitch & FormantFilter."); } }
SPINET Sound_to_SPINET (Sound me, double timeStep, double windowDuration, double minimumFrequencyHz, double maximumFrequencyHz, long nFilters, double excitationErbProportion, double inhibitionErbProportion) { Sound window = NULL, frame = NULL; SPINET thee = NULL; long i, j, k, numberOfFrames; double firstTime, b = 1.02, samplingFrequency = 1 / my dx; double *f = NULL, *bw = NULL, *aex = NULL, *ain = NULL; if (timeStep < my dx) timeStep = my dx; if (maximumFrequencyHz > samplingFrequency / 2) maximumFrequencyHz = samplingFrequency / 2; if (! Sampled_shortTermAnalysis (me, windowDuration, timeStep, &numberOfFrames, &firstTime) || ! (thee = SPINET_create (my xmin, my xmax, numberOfFrames, timeStep, firstTime, minimumFrequencyHz, maximumFrequencyHz, nFilters, excitationErbProportion, inhibitionErbProportion)) || ! (window = Sound_createGaussian (windowDuration, samplingFrequency)) || ! (frame = Sound_createSimple (1, windowDuration, samplingFrequency)) || ! (f = NUMdvector (1, nFilters)) || ! (bw = NUMdvector (1, nFilters)) || ! (aex = NUMdvector (1, nFilters)) || ! (ain = NUMdvector (1, nFilters))) goto cleanup; /* Cochlear filterbank: gammatone */ for (i=1; i <= nFilters; i++) { f[i] = NUMerbToHertz (thy y1 + (i - 1) * thy dy); bw[i] = 2 * NUMpi * b * (f[i] * (6.23e-6 * f[i] + 93.39e-3) + 28.52); } Melder_progress1 (0.0, L"SPINET analysis"); for (i=1; i <= nFilters; i++) { Sound gammaTone = NULL, filtered = NULL; /* Contribution of outer & middle ear and phase locking */ double bb = (f[i] / 1000) * exp (- f[i] / 1000); /* Time where gammafunction envelope has its maximum */ double tgammaMax = (thy gamma - 1) / bw[i]; /* Amplitude at tgammaMax */ double gammaMaxAmplitude = pow ((thy gamma - 1) / (NUMe * bw[i]), (thy gamma - 1)); double timeCorrection = tgammaMax - windowDuration / 2; if (! (gammaTone = Sound_createGammaTone (0, 0.1, samplingFrequency, thy gamma, b, f[i], 0, 0, 0)) || /* filtering can be made 30% faster by taking Spectrum(me) outside the loop */ ! (filtered = Sounds_convolve (me, gammaTone, kSounds_convolve_scaling_SUM, kSounds_convolve_signalOutsideTimeDomain_ZERO))) { forget (gammaTone); goto cleanup; } /* To energy measure: weigh with broad-band transfer function */ for (j=1; j <= numberOfFrames; j++) { Sound_into_Sound (filtered, frame, Sampled_indexToX (thee, j) + timeCorrection); Sounds_multiply (frame, window); thy y[i][j] = Sound_power (frame) * bb / gammaMaxAmplitude; } forget (filtered); forget (gammaTone); if (! Melder_progress5 ((double)i / nFilters, L"SPINET: filter ", Melder_integer (i), L" from ", Melder_integer (nFilters), L".")) goto cleanup; } /* Excitatory and inhibitory area functions */ for (i=1; i <= nFilters; i++) { for (k=1; k <= nFilters; k++) { double fr = (f[k] - f[i]) / bw[i]; aex[i] += fgamma (fr / thy excitationErbProportion, thy gamma); ain[i] += fgamma (fr / thy inhibitionErbProportion, thy gamma); } } /* On-center off-surround interactions */ for (j=1; j <= numberOfFrames; j++) for (i=1; i <= nFilters; i++) { double a = 0; for (k=1; k <= nFilters; k++) { double fr = (f[k] - f[i]) / bw[i]; double hexsq = fgamma (fr / thy excitationErbProportion, thy gamma); double hinsq = fgamma (fr / thy inhibitionErbProportion, thy gamma); a += thy y[k][j] * (hexsq / aex[i] - hinsq / ain[i]); } thy s[i][j] = a > 0 ? a : 0; } Melder_progress1 (1.0, NULL); cleanup: NUMdvector_free (aex, 1); NUMdvector_free (ain, 1); NUMdvector_free (f, 1); NUMdvector_free (bw, 1); forget (window); forget (frame); if (! Melder_hasError()) return thee; forget (thee); return Melder_errorp1 (L"Sound_to_SPINET: not performed."); }
LPC LPC_and_Sound_to_LPC_robust (LPC thee, Sound me, double analysisWidth, double preEmphasisFrequency, double k, int itermax, double tol, int wantlocation) { struct huber_struct struct_huber = { 0 }; try { double t1, samplingFrequency = 1.0 / my dx, tol_svd = 0.000001; double location = 0, windowDuration = 2 * analysisWidth; /* Gaussian window */ long nFrames, frameErrorCount = 0, iter = 0; long p = thy maxnCoefficients; if (my xmin != thy xmin || my xmax != thy xmax) { Melder_throw ("Time domains differ."); } if (my dx != thy samplingPeriod) { Melder_throw ("Sampling intervals differ."); } if (floor (windowDuration / my dx) < p + 1) { Melder_throw ("Analysis window too short."); } Sampled_shortTermAnalysis (me, windowDuration, thy dx, & nFrames, & t1); if (nFrames != thy nx || t1 != thy x1) { Melder_throw ("Incorrect retrieved analysis width"); } autoSound sound = Data_copy (me); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoLPC him = Data_copy (thee); huber_struct_init (&struct_huber, windowDuration, p, samplingFrequency, location, wantlocation); struct_huber.k = k; struct_huber.tol = tol; struct_huber.tol_svd = tol_svd; struct_huber.itermax = itermax; autoMelderProgress progess (L"LPC analysis"); Sound_preEmphasis (sound.peek(), preEmphasisFrequency); for (long i = 1; i <= nFrames; i++) { LPC_Frame lpc = (LPC_Frame) & thy d_frames[i]; LPC_Frame lpcto = (LPC_Frame) & his d_frames[i]; double t = Sampled_indexToX (thee, i); Sound_into_Sound (sound.peek(), sframe.peek(), t - windowDuration / 2); Vector_subtractMean (sframe.peek()); Sounds_multiply (sframe.peek(), window.peek()); try { LPC_Frames_and_Sound_huber (lpc, sframe.peek(), lpcto, & struct_huber); } catch (MelderError) { frameErrorCount++; } iter += struct_huber.iter; if ( (i % 10) == 1) { Melder_progress ( (double) i / nFrames, L"LPC analysis of frame ", Melder_integer (i), L" out of ", Melder_integer (nFrames), L"."); } } if (frameErrorCount) Melder_warning (L"Results of ", Melder_integer (frameErrorCount), L" frame(s) out of ", Melder_integer (nFrames), L" could not be optimised."); MelderInfo_writeLine4 (L"Number of iterations: ", Melder_integer (iter), L"\n Average per frame: ", Melder_double (((double) iter) / nFrames)); huber_struct_destroy (&struct_huber); return him.transfer(); } catch (MelderError) { huber_struct_destroy (&struct_huber); Melder_throw (me, ": no robust LPC created."); } }
Pitch Sound_to_Pitch_shs (Sound me, double timeStep, double minimumPitch, double maximumFrequency, double ceiling, long maxnSubharmonics, long maxnCandidates, double compressionFactor, long nPointsPerOctave) { try { double firstTime, newSamplingFrequency = 2 * maximumFrequency; double windowDuration = 2 / minimumPitch, halfWindow = windowDuration / 2; double atans = nPointsPerOctave * NUMlog2 (65.0 / 50.0) - 1; // Number of speech samples in the downsampled signal in each frame: // 100 for windowDuration == 0.04 and newSamplingFrequency == 2500 long nx = lround (windowDuration * newSamplingFrequency); // The minimum number of points for the fft is 256. long nfft = 1; while ( (nfft *= 2) < nx || nfft <= 128) { ; } long nfft2 = nfft / 2 + 1; double frameDuration = nfft / newSamplingFrequency; double df = newSamplingFrequency / nfft; // The number of points on the octave scale double fminl2 = NUMlog2 (minimumPitch), fmaxl2 = NUMlog2 (maximumFrequency); long nFrequencyPoints = (long) floor ((fmaxl2 - fminl2) * nPointsPerOctave); double dfl2 = (fmaxl2 - fminl2) / (nFrequencyPoints - 1); autoSound sound = Sound_resample (me, newSamplingFrequency, 50); long numberOfFrames; Sampled_shortTermAnalysis (sound.peek(), windowDuration, timeStep, &numberOfFrames, &firstTime); autoSound frame = Sound_createSimple (1, frameDuration, newSamplingFrequency); autoSound hamming = Sound_createHamming (nx / newSamplingFrequency, newSamplingFrequency); autoPitch thee = Pitch_create (my xmin, my xmax, numberOfFrames, timeStep, firstTime, ceiling, maxnCandidates); autoNUMvector<double> cc (1, numberOfFrames); autoNUMvector<double> specAmp (1, nfft2); autoNUMvector<double> fl2 (1, nfft2); autoNUMvector<double> yv2 (1, nfft2); autoNUMvector<double> arctg (1, nFrequencyPoints); autoNUMvector<double> al2 (1, nFrequencyPoints); Melder_assert (frame->nx >= nx); Melder_assert (hamming->nx == nx); // Compute the absolute value of the globally largest amplitude w.r.t. the global mean. double globalMean, globalPeak; Sound_localMean (sound.peek(), sound -> xmin, sound -> xmax, &globalMean); Sound_localPeak (sound.peek(), sound -> xmin, sound -> xmax, globalMean, &globalPeak); /* For the cubic spline interpolation we need the frequencies on an octave scale, i.e., a log2 scale. All frequencies must be DIFFERENT, otherwise the cubic spline interpolation will give corrupt results. Because log2(f==0) is not defined, we use the heuristic: f[2]-f[1] == f[3]-f[2]. */ for (long i = 2; i <= nfft2; i++) { fl2[i] = NUMlog2 ( (i - 1) * df); } fl2[1] = 2 * fl2[2] - fl2[3]; // Calculate frequencies regularly spaced on a log2-scale and // the frequency weighting function. for (long i = 1; i <= nFrequencyPoints; i++) { arctg[i] = 0.5 + atan (3 * (i - atans) / nPointsPerOctave) / NUMpi; } // Perform the analysis on all frames. for (long i = 1; i <= numberOfFrames; i++) { Pitch_Frame pitchFrame = &thy frame[i]; double hm = 1, f0, pitch_strength, localMean, localPeak; double tmid = Sampled_indexToX (thee.peek(), i); /* The center of this frame */ long nx_tmp = frame -> nx; // Copy a frame from the sound, apply a hamming window. Get local 'intensity' frame -> nx = nx; /*begin vies */ Sound_into_Sound (sound.peek(), frame.peek(), tmid - halfWindow); Sounds_multiply (frame.peek(), hamming.peek()); Sound_localMean (sound.peek(), tmid - 3 * halfWindow, tmid + 3 * halfWindow, &localMean); Sound_localPeak (sound.peek(), tmid - halfWindow, tmid + halfWindow, localMean, &localPeak); pitchFrame -> intensity = localPeak > globalPeak ? 1 : localPeak / globalPeak; frame -> nx = nx_tmp; /* einde vies */ // Get the Fourier spectrum. autoSpectrum spec = Sound_to_Spectrum (frame.peek(), 1); Melder_assert (spec->nx == nfft2); // From complex spectrum to amplitude spectrum. for (long j = 1; j <= nfft2; j++) { double rs = spec -> z[1][j], is = spec -> z[2][j]; specAmp[j] = sqrt (rs * rs + is * is); } // Enhance the peaks in the spectrum. spec_enhance_SHS (specAmp.peek(), nfft2); // Smooth the enhanced spectrum. spec_smoooth_SHS (specAmp.peek(), nfft2); // Go to a logarithmic scale and perform cubic spline interpolation to get // spectral values for the increased number of frequency points. NUMspline (fl2.peek(), specAmp.peek(), nfft2, 1e30, 1e30, yv2.peek()); for (long j = 1; j <= nFrequencyPoints; j++) { double f = fminl2 + (j - 1) * dfl2; NUMsplint (fl2.peek(), specAmp.peek(), yv2.peek(), nfft2, f, &al2[j]); } // Multiply by frequency selectivity of the auditory system. for (long j = 1; j <= nFrequencyPoints; j++) al2[j] = al2[j] > 0 ? al2[j] * arctg[j] : 0; // The subharmonic summation. Shift spectra in octaves and sum. Pitch_Frame_init (pitchFrame, maxnCandidates); autoNUMvector<double> sumspec (1, nFrequencyPoints); pitchFrame -> nCandidates = 0; /* !!!!! */ for (long m = 1; m <= maxnSubharmonics + 1; m++) { long kb = 1 + (long) floor (nPointsPerOctave * NUMlog2 (m)); for (long k = kb; k <= nFrequencyPoints; k++) { sumspec[k - kb + 1] += al2[k] * hm; } hm *= compressionFactor; } // First register the voiceless candidate (always present). Pitch_Frame_addPitch (pitchFrame, 0, 0, maxnCandidates); /* Get the best local estimates for the pitch as the maxima of the subharmonic sum spectrum by parabolic interpolation on three points: The formula for a parabole with a maximum is: y(x) = a - b (x - c)^2 with a, b, c >= 0 The three points are (-x, y1), (0, y2) and (x, y3). The solution for a (the maximum) and c (the position) is: a = (2 y1 (4 y2 + y3) - y1^2 - (y3 - 4 y2)^2)/( 8 (y1 - 2 y2 + y3) c = dx (y1 - y3) / (2 (y1 - 2 y2 + y3)) (b = (2 y2 - y1 - y3) / (2 dx^2) ) */ for (long k = 2; k <= nFrequencyPoints - 1; k++) { double y1 = sumspec[k - 1], y2 = sumspec[k], y3 = sumspec[k + 1]; if (y2 > y1 && y2 >= y3) { double denum = y1 - 2 * y2 + y3, tmp = y3 - 4 * y2; double x = dfl2 * (y1 - y3) / (2 * denum); double f = pow (2, fminl2 + (k - 1) * dfl2 + x); double strength = (2 * y1 * (4 * y2 + y3) - y1 * y1 - tmp * tmp) / (8 * denum); Pitch_Frame_addPitch (pitchFrame, f, strength, maxnCandidates); } } /* Check whether f0 corresponds to an actual periodicity T = 1 / f0: correlate two signal periods of duration T, one starting at the middle of the interval and one starting T seconds before. If there is periodicity the correlation coefficient should be high. However, some sounds do not show any regularity, or very low frequency and regularity, and nevertheless have a definite pitch, e.g. Shepard sounds. */ Pitch_Frame_getPitch (pitchFrame, &f0, &pitch_strength); if (f0 > 0) { cc[i] = Sound_correlateParts (sound.peek(), tmid - 1.0 / f0, tmid, 1.0 / f0); } } // Base V/UV decision on correlation coefficients. // Resize the pitch strengths w.r.t. the cc. double vuvCriterium = 0.52; for (long i = 1; i <= numberOfFrames; i++) { Pitch_Frame_resizeStrengths (& thy frame[i], cc[i], vuvCriterium); } return thee.transfer(); } catch (MelderError) { Melder_throw (me, U": no Pitch (shs) created."); } }
SPINET Sound_to_SPINET (Sound me, double timeStep, double windowDuration, double minimumFrequencyHz, double maximumFrequencyHz, long nFilters, double excitationErbProportion, double inhibitionErbProportion) { try { double firstTime, b = 1.02, samplingFrequency = 1 / my dx; if (timeStep < my dx) { timeStep = my dx; } if (maximumFrequencyHz > samplingFrequency / 2) { maximumFrequencyHz = samplingFrequency / 2; } long numberOfFrames; Sampled_shortTermAnalysis (me, windowDuration, timeStep, &numberOfFrames, &firstTime); autoSPINET thee = SPINET_create (my xmin, my xmax, numberOfFrames, timeStep, firstTime, minimumFrequencyHz, maximumFrequencyHz, nFilters, excitationErbProportion, inhibitionErbProportion); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoSound frame = Sound_createSimple (1, windowDuration, samplingFrequency); autoNUMvector<double> f (1, nFilters); autoNUMvector<double> bw (1, nFilters); autoNUMvector<double> aex (1, nFilters); autoNUMvector<double> ain (1, nFilters); // Cochlear filterbank: gammatone for (long i = 1; i <= nFilters; i++) { f[i] = NUMerbToHertz (thy y1 + (i - 1) * thy dy); bw[i] = 2 * NUMpi * b * (f[i] * (6.23e-6 * f[i] + 93.39e-3) + 28.52); } autoMelderProgress progress (L"SPINET analysis"); for (long i = 1; i <= nFilters; i++) { double bb = (f[i] / 1000) * exp (- f[i] / 1000); // outer & middle ear and phase locking double tgammaMax = (thy gamma - 1) / bw[i]; // Time where gammafunction envelope has maximum double gammaMaxAmplitude = pow ( (thy gamma - 1) / (NUMe * bw[i]), (thy gamma - 1)); // tgammaMax double timeCorrection = tgammaMax - windowDuration / 2; autoSound gammaTone = Sound_createGammaTone (0, 0.1, samplingFrequency, thy gamma, b, f[i], 0, 0, 0); autoSound filtered = Sounds_convolve (me, gammaTone.peek(), kSounds_convolve_scaling_SUM, kSounds_convolve_signalOutsideTimeDomain_ZERO); // To energy measure: weigh with broad-band transfer function for (long j = 1; j <= numberOfFrames; j++) { Sound_into_Sound (filtered.peek(), frame.peek(), Sampled_indexToX (thee.peek(), j) + timeCorrection); Sounds_multiply (frame.peek(), window.peek()); thy y[i][j] = Sound_power (frame.peek()) * bb / gammaMaxAmplitude; } Melder_progress ( (double) i / nFilters, L"SPINET: filter ", Melder_integer (i), L" from ", Melder_integer (nFilters), L"."); } // Excitatory and inhibitory area functions for (long i = 1; i <= nFilters; i++) { for (long k = 1; k <= nFilters; k++) { double fr = (f[k] - f[i]) / bw[i]; aex[i] += fgamma (fr / thy excitationErbProportion, thy gamma); ain[i] += fgamma (fr / thy inhibitionErbProportion, thy gamma); } } // On-center off-surround interactions for (long j = 1; j <= numberOfFrames; j++) for (long i = 1; i <= nFilters; i++) { double a = 0; for (long k = 1; k <= nFilters; k++) { double fr = (f[k] - f[i]) / bw[i]; double hexsq = fgamma (fr / thy excitationErbProportion, thy gamma); double hinsq = fgamma (fr / thy inhibitionErbProportion, thy gamma); a += thy y[k][j] * (hexsq / aex[i] - hinsq / ain[i]); } thy s[i][j] = a > 0 ? a : 0; } return thee.transfer(); } catch (MelderError) { Melder_throw (me, ": no SPINET created."); } }