autoComplexSpectrogram Sound_to_ComplexSpectrogram (Sound me, double windowLength, double timeStep) { try { double samplingFrequency = 1.0 / my dx, myDuration = my xmax - my xmin, t1; if (windowLength > myDuration) { Melder_throw (U"Your sound is too short:\nit should be at least as long as one window length."); } long nsamp_window = (long) floor (windowLength / my dx); long halfnsamp_window = nsamp_window / 2 - 1; nsamp_window = halfnsamp_window * 2; if (nsamp_window < 2) { Melder_throw (U"Your analysis window is too short: less than two samples."); } long numberOfFrames; Sampled_shortTermAnalysis (me, windowLength, timeStep, &numberOfFrames, &t1); // Compute sampling of the spectrum long numberOfFrequencies = halfnsamp_window + 1; double df = samplingFrequency / (numberOfFrequencies - 1); autoComplexSpectrogram thee = ComplexSpectrogram_create (my xmin, my xmax, numberOfFrames, timeStep, t1, 0.0, 0.5 * samplingFrequency, numberOfFrequencies, df, 0.0); // autoSound analysisWindow = Sound_create (1, 0.0, nsamp_window * my dx, nsamp_window, my dx, 0.5 * my dx); for (long iframe = 1; iframe <= numberOfFrames; iframe++) { double t = Sampled_indexToX (thee.get(), iframe); long leftSample = Sampled_xToLowIndex (me, t), rightSample = leftSample + 1; long startSample = rightSample - halfnsamp_window; long endSample = leftSample + halfnsamp_window; Melder_assert (startSample >= 1); Melder_assert (endSample <= my nx); for (long j = 1; j <= nsamp_window; j++) { analysisWindow -> z[1][j] = my z[1][startSample - 1 + j]; } // window ? autoSpectrum spec = Sound_to_Spectrum (analysisWindow.get(), 0); thy z[1][iframe] = spec -> z[1][1] * spec -> z[1][1]; thy phase[1][iframe] = 0.0; for (long ifreq = 2; ifreq <= numberOfFrequencies - 1; ifreq++) { double x = spec -> z[1][ifreq], y = spec -> z[2][ifreq]; thy z[ifreq][iframe] = x * x + y * y; // power thy phase[ifreq][iframe] = atan2 (y, x); // phase [-pi,+pi] } // even number of samples thy z[numberOfFrequencies][iframe] = spec -> z[1][numberOfFrequencies] * spec -> z[1][numberOfFrequencies]; thy phase[numberOfFrequencies][iframe] = 0.0; } return thee; } catch (MelderError) { Melder_throw (me, U": no ComplexSpectrogram created."); } }
static LPC _Sound_to_LPC (Sound me, int predictionOrder, double analysisWidth, double dt, double preEmphasisFrequency, int method, double tol1, double tol2) { double t1, samplingFrequency = 1.0 / my dx; double windowDuration = 2 * analysisWidth; /* gaussian window */ long nFrames, frameErrorCount = 0; if (floor (windowDuration / my dx) < predictionOrder + 1) Melder_throw ("Analysis window duration too short.\n" "For a prediction order of ", predictionOrder, " the analysis window duration has to be greater than ", my dx * (predictionOrder + 1), "Please increase the analysis window duration or lower the prediction order."); // Convenience: analyse the whole sound into one LPC_frame if (windowDuration > my dx * my nx) { windowDuration = my dx * my nx; } Sampled_shortTermAnalysis (me, windowDuration, dt, & nFrames, & t1); autoSound sound = Data_copy (me); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoLPC thee = LPC_create (my xmin, my xmax, nFrames, dt, t1, predictionOrder, my dx); autoMelderProgress progress (L"LPC analysis"); if (preEmphasisFrequency < samplingFrequency / 2) { Sound_preEmphasis (sound.peek(), preEmphasisFrequency); } for (long i = 1; i <= nFrames; i++) { LPC_Frame lpcframe = (LPC_Frame) & thy d_frames[i]; double t = Sampled_indexToX (thee.peek(), i); LPC_Frame_init (lpcframe, predictionOrder); Sound_into_Sound (sound.peek(), sframe.peek(), t - windowDuration / 2); Vector_subtractMean (sframe.peek()); Sounds_multiply (sframe.peek(), window.peek()); if (method == LPC_METHOD_AUTO) { if (! Sound_into_LPC_Frame_auto (sframe.peek(), lpcframe)) { frameErrorCount++; } } else if (method == LPC_METHOD_COVAR) { if (! Sound_into_LPC_Frame_covar (sframe.peek(), lpcframe)) { frameErrorCount++; } } else if (method == LPC_METHOD_BURG) { if (! Sound_into_LPC_Frame_burg (sframe.peek(), lpcframe)) { frameErrorCount++; } } else if (method == LPC_METHOD_MARPLE) { if (! Sound_into_LPC_Frame_marple (sframe.peek(), lpcframe, tol1, tol2)) { frameErrorCount++; } } if ( (i % 10) == 1) { Melder_progress ( (double) i / nFrames, L"LPC analysis of frame ", Melder_integer (i), L" out of ", Melder_integer (nFrames), L"."); } } return thee.transfer(); }
autoMelSpectrogram Sound_to_MelSpectrogram (Sound me, double analysisWidth, double dt, double f1_mel, double fmax_mel, double df_mel) { try { double t1, samplingFrequency = 1.0 / my dx, nyquist = 0.5 * samplingFrequency; double windowDuration = 2.0 * analysisWidth; // gaussian window double fmin_mel = 0.0; double fbottom = NUMhertzToMel2 (100.0), fceiling = NUMhertzToMel2 (nyquist); long numberOfFrames; // Check defaults. if (fmax_mel <= 0.0 || fmax_mel > fceiling) { fmax_mel = fceiling; } if (fmax_mel <= f1_mel) { f1_mel = fbottom; fmax_mel = fceiling; } if (f1_mel <= 0.0) { f1_mel = fbottom; } if (df_mel <= 0.0) { df_mel = 100.0; } // Determine the number of filters. long numberOfFilters = lround ((fmax_mel - f1_mel) / df_mel); fmax_mel = f1_mel + numberOfFilters * df_mel; Sampled_shortTermAnalysis (me, windowDuration, dt, &numberOfFrames, &t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoMelSpectrogram thee = MelSpectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_mel, fmax_mel, numberOfFilters, df_mel, f1_mel); autoMelderProgress progress (U"MelSpectrograms analysis"); for (long iframe = 1; iframe <= numberOfFrames; iframe++) { double t = Sampled_indexToX (thee.get(), iframe); Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0); Sounds_multiply (sframe.get(), window.get()); Sound_into_MelSpectrogram_frame (sframe.get(), thee.get(), iframe); if (iframe % 10 == 1) { Melder_progress ((double) iframe / numberOfFrames, U"Frame ", iframe, U" out of ", numberOfFrames, U"."); } } _Spectrogram_windowCorrection ((Spectrogram) thee.get(), window -> nx); return thee; } catch (MelderError) { Melder_throw (me, U": no MelSpectrogram created."); } }
autoBarkSpectrogram Sound_to_BarkSpectrogram (Sound me, double analysisWidth, double dt, double f1_bark, double fmax_bark, double df_bark) { try { double nyquist = 0.5 / my dx, samplingFrequency = 2 * nyquist; double windowDuration = 2 * analysisWidth; /* gaussian window */ double zmax = NUMhertzToBark2 (nyquist); double fmin_bark = 0; // Check defaults. if (f1_bark <= 0) { f1_bark = 1; } if (fmax_bark <= 0) { fmax_bark = zmax; } if (df_bark <= 0) { df_bark = 1; } fmax_bark = MIN (fmax_bark, zmax); long numberOfFilters = lround ( (fmax_bark - f1_bark) / df_bark); if (numberOfFilters <= 0) { Melder_throw (U"The combination of filter parameters is not valid."); } long numberOfFrames; double t1; Sampled_shortTermAnalysis (me, windowDuration, dt, & numberOfFrames, & t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoBarkSpectrogram thee = BarkSpectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_bark, fmax_bark, numberOfFilters, df_bark, f1_bark); autoMelderProgress progess (U"BarkSpectrogram analysis"); for (long iframe = 1; iframe <= numberOfFrames; iframe++) { double t = Sampled_indexToX (thee.get(), iframe); Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0); Sounds_multiply (sframe.get(), window.get()); Sound_into_BarkSpectrogram_frame (sframe.get(), thee.get(), iframe); if (iframe % 10 == 1) { Melder_progress ( (double) iframe / numberOfFrames, U"BarkSpectrogram analysis: frame ", iframe, U" from ", numberOfFrames, U"."); } } _Spectrogram_windowCorrection ((Spectrogram) thee.get(), window -> nx); return thee; } catch (MelderError) { Melder_throw (me, U": no BarkSpectrogram created."); } }
PowerCepstrogram Sound_to_PowerCepstrogram (Sound me, double pitchFloor, double dt, double maximumFrequency, double preEmphasisFrequency) { try { // minimum analysis window has 3 periods of lowest pitch double analysisWidth = 3 / pitchFloor; double windowDuration = 2 * analysisWidth; /* gaussian window */ long nFrames; // Convenience: analyse the whole sound into one Cepstrogram_frame if (windowDuration > my dx * my nx) { windowDuration = my dx * my nx; } double t1, samplingFrequency = 2 * maximumFrequency; autoSound sound = Sound_resample (me, samplingFrequency, 50); Sound_preEmphasis (sound.peek(), preEmphasisFrequency); Sampled_shortTermAnalysis (me, windowDuration, dt, & nFrames, & t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); // find out the size of the FFT long nfft = 2; while (nfft < sframe -> nx) nfft *= 2; long nq = nfft / 2 + 1; double qmax = 0.5 * nfft / samplingFrequency, dq = qmax / (nq - 1); autoPowerCepstrogram thee = PowerCepstrogram_create (my xmin, my xmax, nFrames, dt, t1, 0, qmax, nq, dq, 0); autoMelderProgress progress (L"Cepstrogram analysis"); for (long iframe = 1; iframe <= nFrames; iframe++) { double t = Sampled_indexToX (thee.peek(), iframe); Sound_into_Sound (sound.peek(), sframe.peek(), t - windowDuration / 2); Vector_subtractMean (sframe.peek()); Sounds_multiply (sframe.peek(), window.peek()); autoSpectrum spec = Sound_to_Spectrum (sframe.peek(), 1); // FFT yes autoPowerCepstrum cepstrum = Spectrum_to_PowerCepstrum (spec.peek()); for (long i = 1; i <= nq; i++) { thy z[i][iframe] = cepstrum -> z[1][i]; } if ((iframe % 10) == 1) { Melder_progress ((double) iframe / nFrames, L"PowerCepstrogram analysis of frame ", Melder_integer (iframe), L" out of ", Melder_integer (nFrames), L"."); } } return thee.transfer(); } catch (MelderError) { Melder_throw (me, ": no PowerCepstrogram created."); } }
Cepstrogram Sound_to_Cepstrogram (Sound me, double analysisWidth, double dt, double maximumFrequency) { try { double windowDuration = 2 * analysisWidth; /* gaussian window */ long nFrames; // Convenience: analyse the whole sound into one Cepstrogram_frame if (windowDuration > my dx * my nx) { windowDuration = my dx * my nx; } double t1, samplingFrequency = 2 * maximumFrequency; autoSound sound = Sound_resample (me, samplingFrequency, 50); Sampled_shortTermAnalysis (me, windowDuration, dt, & nFrames, & t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); double qmin, qmax, dq, q1; long nq; { // laziness: find out the proper dimensions autoSpectrum spec = Sound_to_Spectrum (sframe.peek(), 1); autoCepstrum cepstrum = Spectrum_to_Cepstrum (spec.peek()); qmin = cepstrum -> xmin; qmax = cepstrum -> xmax; dq = cepstrum -> dx; q1 = cepstrum -> x1; nq = cepstrum -> nx; } autoCepstrogram thee = Cepstrogram_create (my xmin, my xmax, nFrames, dt, t1, qmin, qmax, nq, dq, q1); autoMelderProgress progress (L"Cepstrogram analysis"); for (long iframe = 1; iframe <= nFrames; iframe++) { double t = Sampled_indexToX (thee.peek(), iframe); Sound_into_Sound (sound.peek(), sframe.peek(), t - windowDuration / 2); Vector_subtractMean (sframe.peek()); Sounds_multiply (sframe.peek(), window.peek()); autoSpectrum spec = Sound_to_Spectrum (sframe.peek(), 1); autoCepstrum cepstrum = Spectrum_to_Cepstrum (spec.peek()); for (long i = 1; i <= nq; i++) { thy z[i][iframe] = cepstrum -> z[1][i]; } if ((iframe % 10) == 1) { Melder_progress ((double) iframe / nFrames, L"Cepstrogram analysis of frame ", Melder_integer (iframe), L" out of ", Melder_integer (nFrames), L"."); } } return thee.transfer(); } catch (MelderError) { Melder_throw (me, ": no Cepstrogram created."); } }
static autoIntensity Sound_to_Intensity_ (Sound me, double minimumPitch, double timeStep, int subtractMeanPressure) { try { /* * Preconditions. */ if (! NUMdefined (minimumPitch)) Melder_throw (U"(Sound-to-Intensity:) Minimum pitch undefined."); if (! NUMdefined (timeStep)) Melder_throw (U"(Sound-to-Intensity:) Time step undefined."); if (timeStep < 0.0) Melder_throw (U"(Sound-to-Intensity:) Time step should be zero or positive instead of ", timeStep, U"."); if (my dx <= 0.0) Melder_throw (U"(Sound-to-Intensity:) The Sound's time step should be positive."); if (minimumPitch <= 0.0) Melder_throw (U"(Sound-to-Intensity:) Minimum pitch should be positive."); /* * Defaults. */ if (timeStep == 0.0) timeStep = 0.8 / minimumPitch; // default: four times oversampling Hanning-wise double windowDuration = 6.4 / minimumPitch; Melder_assert (windowDuration > 0.0); double halfWindowDuration = 0.5 * windowDuration; long halfWindowSamples = (long) floor (halfWindowDuration / my dx); autoNUMvector <double> amplitude (- halfWindowSamples, halfWindowSamples); autoNUMvector <double> window (- halfWindowSamples, halfWindowSamples); for (long i = - halfWindowSamples; i <= halfWindowSamples; i ++) { double x = i * my dx / halfWindowDuration, root = 1 - x * x; window [i] = root <= 0.0 ? 0.0 : NUMbessel_i0_f ((2 * NUMpi * NUMpi + 0.5) * sqrt (root)); } long numberOfFrames; double thyFirstTime; try { Sampled_shortTermAnalysis (me, windowDuration, timeStep, & numberOfFrames, & thyFirstTime); } catch (MelderError) { Melder_throw (U"The duration of the sound in an intensity analysis should be at least 6.4 divided by the minimum pitch (", minimumPitch, U" Hz), " U"i.e. at least ", 6.4 / minimumPitch, U" s, instead of ", my xmax - my xmin, U" s."); } autoIntensity thee = Intensity_create (my xmin, my xmax, numberOfFrames, timeStep, thyFirstTime); for (long iframe = 1; iframe <= numberOfFrames; iframe ++) { double midTime = Sampled_indexToX (thee.peek(), iframe); long midSample = Sampled_xToNearestIndex (me, midTime); long leftSample = midSample - halfWindowSamples, rightSample = midSample + halfWindowSamples; double sumxw = 0.0, sumw = 0.0, intensity; if (leftSample < 1) leftSample = 1; if (rightSample > my nx) rightSample = my nx; for (long channel = 1; channel <= my ny; channel ++) { for (long i = leftSample; i <= rightSample; i ++) { amplitude [i - midSample] = my z [channel] [i]; } if (subtractMeanPressure) { double sum = 0.0; for (long i = leftSample; i <= rightSample; i ++) { sum += amplitude [i - midSample]; } double mean = sum / (rightSample - leftSample + 1); for (long i = leftSample; i <= rightSample; i ++) { amplitude [i - midSample] -= mean; } } for (long i = leftSample; i <= rightSample; i ++) { sumxw += amplitude [i - midSample] * amplitude [i - midSample] * window [i - midSample]; sumw += window [i - midSample]; } } intensity = sumxw / sumw; intensity /= 4e-10; thy z [1] [iframe] = intensity < 1e-30 ? -300 : 10 * log10 (intensity); } return thee; } catch (MelderError) { Melder_throw (me, U": intensity analysis not performed."); } }
FormantFilter Sound_and_Pitch_to_FormantFilter (Sound me, Pitch thee, double analysisWidth, double dt, double f1_hz, double fmax_hz, double df_hz, double relative_bw) { try { double t1, windowDuration = 2 * analysisWidth; /* gaussian window */ double nyquist = 0.5 / my dx, samplingFrequency = 2 * nyquist, fmin_hz = 0; long nt, f0_undefined = 0; if (my xmin > thy xmin || my xmax > thy xmax) Melder_throw ("The domain of the Sound is not included in the domain of the Pitch."); double f0_median = Pitch_getQuantile (thee, thy xmin, thy xmax, 0.5, kPitch_unit_HERTZ); if (f0_median == NUMundefined || f0_median == 0) { f0_median = 100; Melder_warning (L"Pitch values undefined. Bandwith fixed to 100 Hz. "); } if (f1_hz <= 0) { f1_hz = 100; } if (fmax_hz <= 0) { fmax_hz = nyquist; } if (df_hz <= 0) { df_hz = f0_median / 2; } if (relative_bw <= 0) { relative_bw = 1.1; } fmax_hz = MIN (fmax_hz, nyquist); long nf = floor ( (fmax_hz - f1_hz) / df_hz + 0.5); Sampled_shortTermAnalysis (me, windowDuration, dt, &nt, &t1); autoFormantFilter him = FormantFilter_create (my xmin, my xmax, nt, dt, t1, fmin_hz, fmax_hz, nf, df_hz, f1_hz); // Temporary objects autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoMelderProgress progress (L"Sound & Pitch: To FormantFilter"); for (long i = 1; i <= nt; i++) { double t = Sampled_indexToX (him.peek(), i); double b, f0 = Pitch_getValueAtTime (thee, t, kPitch_unit_HERTZ, 0); if (f0 == NUMundefined || f0 == 0) { f0_undefined++; f0 = f0_median; } b = relative_bw * f0; Sound_into_Sound (me, sframe.peek(), t - windowDuration / 2); Sounds_multiply (sframe.peek(), window.peek()); Sound_into_FormantFilter_frame (sframe.peek(), him.peek(), i, b); if ( (i % 10) == 1) { Melder_progress ( (double) i / nt, L"Frame ", Melder_integer (i), L" out of ", Melder_integer (nt), L"."); } } double ref = FilterBank_DBREF * gaussian_window_squared_correction (window -> nx); NUMdmatrix_to_dBs (his z, 1, his ny, 1, his nx, ref, FilterBank_DBFAC, FilterBank_DBFLOOR); return him.transfer(); } catch (MelderError) { Melder_throw ("FormantFilter not created from Pitch & FormantFilter."); } }
MelFilter Sound_to_MelFilter (Sound me, double analysisWidth, double dt, double f1_mel, double fmax_mel, double df_mel) { try { double t1, samplingFrequency = 1 / my dx, nyquist = 0.5 * samplingFrequency; double windowDuration = 2 * analysisWidth; /* gaussian window */ double fmin_mel = 0; double fbottom = HZTOMEL (100.0), fceiling = HZTOMEL (nyquist); long nt, frameErrorCount = 0; // Check defaults. if (fmax_mel <= 0 || fmax_mel > fceiling) { fmax_mel = fceiling; } if (fmax_mel <= f1_mel) { f1_mel = fbottom; fmax_mel = fceiling; } if (f1_mel <= 0) { f1_mel = fbottom; } if (df_mel <= 0) { df_mel = 100.0; } // Determine the number of filters. long nf = floor ( (fmax_mel - f1_mel) / df_mel + 0.5); fmax_mel = f1_mel + nf * df_mel; Sampled_shortTermAnalysis (me, windowDuration, dt, &nt, &t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoMelFilter thee = MelFilter_create (my xmin, my xmax, nt, dt, t1, fmin_mel, fmax_mel, nf, df_mel, f1_mel); autoMelderProgress progress (L"MelFilters analysis"); for (long i = 1; i <= nt; i++) { double t = Sampled_indexToX (thee.peek(), i); Sound_into_Sound (me, sframe.peek(), t - windowDuration / 2); Sounds_multiply (sframe.peek(), window.peek()); if (! Sound_into_MelFilter_frame (sframe.peek(), thee.peek(), i)) { frameErrorCount++; } if ( (i % 10) == 1) { Melder_progress ( (double) i / nt, L"Frame ", Melder_integer (i), L" out of ", Melder_integer (nt), L"."); } } if (frameErrorCount) Melder_warning (L"Analysis results of ", Melder_integer (frameErrorCount), L" frame(s) out of ", Melder_integer (nt), L" will be suspect."); // Window correction. double ref = FilterBank_DBREF * gaussian_window_squared_correction (window -> nx); NUMdmatrix_to_dBs (thy z, 1, thy ny, 1, thy nx, ref, FilterBank_DBFAC, FilterBank_DBFLOOR); return thee.transfer(); } catch (MelderError) { Melder_throw (me, ": no MelFilter created."); } }
Pitch Sound_to_Pitch_any (Sound me, double dt, /*timeStepStradygy related*/ double minimumPitch, /*Pitch settings realted*/ double periodsPerWindow, /*kTimeSoundAnalysisEditor_pitch_analysisMethod related*/ int maxnCandidates, int method, /*method related*/ double silenceThreshold, double voicingThreshold, double octaveCost, double octaveJumpCost, double voicedUnvoicedCost, double ceiling) { NUMfft_Table fftTable = NUMfft_Table_create(); double duration, t1; double dt_window; /* Window length in seconds. */ long nsamp_window, halfnsamp_window; /* Number of samples per window. */ long nFrames, minimumLag, maximumLag; long iframe, nsampFFT; double interpolation_depth; long nsamp_period, halfnsamp_period; /* Number of samples in longest period. */ long brent_ixmax, brent_depth; double brent_accuracy; /* Obsolete. */ double globalPeak; if (maxnCandidates < 2 || method < AC_HANNING && method > FCC_ACCURATE) { std::cout<<"Error: maxnCandidates: "<<maxnCandidates<<" method: "<<method<<"."<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 13. 69"<<std::endl; return NULL; } if (maxnCandidates < ceiling / minimumPitch) maxnCandidates = ceiling / minimumPitch; if (dt <= 0.0) dt = periodsPerWindow / minimumPitch / 4.0; /* e.g. 3 periods, 75 Hz: 10 milliseconds. */ switch (method) { case AC_HANNING: brent_depth = NUM_PEAK_INTERPOLATE_SINC70; brent_accuracy = 1e-7; interpolation_depth = 0.5; break; case AC_GAUSS: periodsPerWindow *= 2; /* Because Gaussian window is twice as long. */ brent_depth = NUM_PEAK_INTERPOLATE_SINC700; brent_accuracy = 1e-11; interpolation_depth = 0.25; /* Because Gaussian window is twice as long. */ break; case FCC_NORMAL: brent_depth = NUM_PEAK_INTERPOLATE_SINC70; brent_accuracy = 1e-7; interpolation_depth = 1.0; break; case FCC_ACCURATE: brent_depth = NUM_PEAK_INTERPOLATE_SINC700; brent_accuracy = 1e-11; interpolation_depth = 1.0; break; } duration = my dx * my nx; if (minimumPitch < periodsPerWindow / duration) { std::cout<<"To analyse this Sound, minimum pitch must not be less than "<< periodsPerWindow / duration<<" Hz."<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.103"<<std::endl; return NULL; } /* * Determine the number of samples in the longest period. * We need this to compute the local mean of the sound (looking one period in both directions), * and to compute the local peak of the sound (looking half a period in both directions). */ nsamp_period = floor(1 / my dx / minimumPitch); halfnsamp_period = nsamp_period / 2 + 1; if (ceiling > 0.5 / my dx) ceiling = 0.5 / my dx; // Determine window length in seconds and in samples. dt_window = periodsPerWindow / minimumPitch; nsamp_window = floor (dt_window / my dx); halfnsamp_window = nsamp_window / 2 - 1; if (halfnsamp_window < 2){ std::cout<<"Analysis window too short."<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.123"<<std::endl; return NULL; } nsamp_window = halfnsamp_window * 2; // Determine the minimum and maximum lags. minimumLag = floor (1 / my dx / ceiling); if (minimumLag < 2) minimumLag = 2; maximumLag = floor (nsamp_window / periodsPerWindow) + 2; if (maximumLag > nsamp_window) maximumLag = nsamp_window; /* * Determine the number of frames. * Fit as many frames as possible symmetrically in the total duration. * We do this even for the forward cross-correlation method, * because that allows us to compare the two methods. */ if(!Sampled_shortTermAnalysis (me, method >= FCC_NORMAL ? 1 / minimumPitch + dt_window : dt_window, dt, & nFrames, & t1)){ std::cout<<"The pitch analysis would give zero pitch frames."<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.142"<<std::endl; return NULL; } // Create the resulting pitch contour. Pitch thee = Pitch_create (my xmin, my xmax, nFrames, dt, t1, ceiling, maxnCandidates); // Compute the global absolute peak for determination of silence threshold. globalPeak = 0.0; for (long channel = 1; channel <= my ny; channel ++) { double mean = 0.0; for (long i = 1; i <= my nx; i ++) { mean += my z [channel] [i]; } mean /= my nx; for (long i = 1; i <= my nx; i ++) { double value = fabs (my z [channel] [i] - mean); if (value > globalPeak) globalPeak = value; } } if (globalPeak == 0.0) return thee; double **frame, *ac, *window, *windowR; if (method >= FCC_NORMAL) { /* For cross-correlation analysis. */ // Create buffer for cross-correlation analysis. frame = (double **)malloc(sizeof(double *) * (my ny + 1)); for(long i = 1; i <= my ny; ++ i){ frame[i] = (double *)malloc(sizeof(double) * (nsamp_window + 1)); for(long j = 1; j <= nsamp_window; ++ j) frame[i][j] = 0.0; } /****frame.reset (1, my ny, 1, nsamp_window);****/ brent_ixmax = nsamp_window * interpolation_depth; } else { /* For autocorrelation analysis. */ /* * Compute the number of samples needed for doing FFT. * To avoid edge effects, we have to append zeroes to the window. * The maximum lag considered for maxima is maximumLag. * The maximum lag used in interpolation is nsamp_window * interpolation_depth. */ nsampFFT = 1; while (nsampFFT < nsamp_window * (1 + interpolation_depth)) nsampFFT *= 2; // Create buffers for autocorrelation analysis. frame = (double **)malloc(sizeof(double *) * (my ny + 1)); for(long i = 1; i <= my ny; ++ i){ frame [i] = (double *)malloc(sizeof(double) * (nsampFFT + 1)); for(long j = 0; j <= nsampFFT; ++ j) frame[i][j] = 0.0; } /****frame.reset (1, my ny, 1, nsampFFT);****/ window = (double *)malloc(sizeof(double) * (nsamp_window + 1)); for(long i = 0; i <= nsamp_window; ++ i) window[i] = 0.0; /****window.reset (1, nsamp_window);****/ windowR = (double *)malloc(sizeof(double) * (nsampFFT + 1)); ac = (double *)malloc(sizeof(double) * (nsampFFT + 1)); for(long i = 0; i <= nsampFFT; ++ i) windowR[i] = ac[i] = 0.0; /****windowR.reset (1, nsampFFT); ac.reset (1, nsampFFT); ****/ NUMfft_Table_init (fftTable, nsampFFT); /* * A Gaussian or Hanning window is applied against phase effects. * The Hanning window is 2 to 5 dB better for 3 periods/window. * The Gaussian window is 25 to 29 dB better for 6 periods/window. */ if (method == AC_GAUSS) { /* Gaussian window. */ double imid = 0.5 * (nsamp_window + 1), edge = exp (-12.0); for (long i = 1; i <= nsamp_window; i ++) window[i] = (exp(-48.0*(i-imid)*(i-imid) / (nsamp_window + 1) / (nsamp_window + 1)) - edge) / (1 - edge); } else { /* Hanning window*/ for (long i = 1; i <= nsamp_window; i ++) window [i] = 0.5 - 0.5 * cos (i * 2 * NUMpi / (nsamp_window + 1)); } // Compute the normalized autocorrelation of the window. for (long i = 1; i <= nsamp_window; i ++) windowR [i] = window [i]; NUMfft_forward (fftTable, windowR); windowR [1] *= windowR [1]; // DC component for (long i = 2; i < nsampFFT; i += 2) { windowR [i] = windowR [i] * windowR [i] + windowR [i+1] * windowR [i+1]; windowR [i + 1] = 0.0; // power spectrum: square and zero } windowR [nsampFFT] *= windowR [nsampFFT]; // Nyquist frequency NUMfft_backward (fftTable, windowR); // autocorrelation for (long i = 2; i <= nsamp_window; i ++) windowR [i] /= windowR [1]; // normalize windowR [1] = 1.0; // normalize brent_ixmax = nsamp_window * interpolation_depth; } double *r = (double *) malloc( sizeof(double) * (2 * (nsamp_window + 1) + 1) ); r += nsamp_window + 1; //make "r" become a symetrical vectr long *imax = (long *) malloc( sizeof(long) * (maxnCandidates + 1)); double *localMean = (double *) malloc( sizeof(double) * (my ny + 1)); for(iframe = 1; iframe <= nFrames; iframe ++){ Pitch_Frame pitchFrame = & thy frame[iframe]; double t = thy x1 + (iframe - 1) *(thy dx), localPeak; long leftSample = (long) floor((t - my x1) / my dx) + 1; long rightSample = leftSample + 1; long startSample, endSample; for(long channel = 1; channel <= my ny; ++ channel){ //Compute the local mean; look one longest period to both sides. startSample = rightSample - nsamp_period; endSample = leftSample + nsamp_period; if ( startSample < 0 ) { std::cout<<"StartSample < 1"<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31"<<std::endl; return NULL; } if (endSample > my nx){ std::cout<<"EndSample > my nx"<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.262"<<std::endl; return NULL; } localMean[channel] = 0.0; for (long i = startSample; i <= endSample; i ++) { localMean[channel] += my z[channel][i]; } localMean[channel] /= 2 * nsamp_period; // Copy a window to a frame and subtract the local mean. We are going to kill the DC component before windowing. startSample = rightSample - halfnsamp_window; endSample = leftSample + halfnsamp_window; if ( startSample < 1 ) { std::cout<<"StartSample < 1"<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.281"<<std::endl; return NULL; } if (endSample > my nx){ std::cout<<"EndSample > my nx"<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.287"<<std::endl; return NULL; } if (method < FCC_NORMAL) { for (long j = 1, i = startSample; j <= nsamp_window; j ++) frame [channel] [j] = (my z [channel] [i ++] - localMean [channel]) * window [j]; for (long j = nsamp_window + 1; j <= nsampFFT; j ++) frame [channel] [j] = 0.0; } else { for (long j = 1, i = startSample; j <= nsamp_window; j ++) frame [channel] [j] = my z [channel] [i ++] - localMean [channel]; } } // Compute the local peak; look half a longest period to both sides. localPeak = 0.0; if ((startSample = halfnsamp_window + 1 - halfnsamp_period) < 1) startSample = 1; if ((endSample = halfnsamp_window + halfnsamp_period) > nsamp_window) endSample = nsamp_window; for (long channel = 1; channel <= my ny; channel ++) { for (long j = startSample; j <= endSample; j ++) { double value = fabs (frame [channel] [j]); if (value > localPeak) localPeak = value; } } pitchFrame->intensity = localPeak > globalPeak ? 1.0 : localPeak / globalPeak; // Compute the correlation into the array 'r'. if (method >= FCC_NORMAL) { double startTime = t - 0.5 * (1.0 / minimumPitch + dt_window); long localSpan = maximumLag + nsamp_window, localMaximumLag, offset; if ((startSample = (long) floor ((startTime - my x1) / my dx)) + 1 < 1) startSample = 1; if (localSpan > my nx + 1 - startSample) localSpan = my nx + 1 - startSample; localMaximumLag = localSpan - nsamp_window; offset = startSample - 1; double sumx2 = 0; /* Sum of squares. */ for (long channel = 1; channel <= my ny; channel ++) { ///channel = 1; channel <= my ny double *amp = my z[channel] + offset; for (long i = 1; i <= nsamp_window; i ++) { ///i = 1; i <= nsamp_window double x = amp[i] - localMean[channel]; sumx2 += x * x; } } double sumy2 = sumx2; /* At zero lag, these are still equal. */ r[0] = 1.0; for (long i = 1; i <= localMaximumLag; i ++) { double product = 0.0; for (long channel = 1; channel <= my ny; channel ++) { ///channel = 1; channel <= my ny double *amp = my z[channel] + offset; double y0 = amp[i] - localMean[channel]; double yZ = amp[i + nsamp_window] - localMean[channel]; sumy2 += yZ * yZ - y0 * y0; for (long j = 1; j <= nsamp_window; j ++) { ///j = 1; j <= nsamp_window double x = amp[j] - localMean[channel]; double y = amp[i + j] - localMean[channel]; product += x * y; } } r[- i] = r[i] = product / sqrt (sumx2 * sumy2); } } else { // The FFT of the autocorrelation is the power spectrum. for (long i = 1; i <= nsampFFT; i ++) ac [i] = 0.0; for (long channel = 1; channel <= my ny; channel ++) { NUMfft_forward (fftTable, frame [channel]); /* Complex spectrum. */ ac [1] += frame [channel] [1] * frame [channel] [1]; /* DC component. */ for (long i = 2; i < nsampFFT; i += 2) { ac [i] += frame [channel] [i] * frame [channel] [i] + frame [channel] [i+1] * frame [channel] [i+1]; /* Power spectrum. */ } ac [nsampFFT] += frame [channel] [nsampFFT] * frame [channel] [nsampFFT]; /* Nyquist frequency. */ } NUMfft_backward (fftTable, ac); /* Autocorrelation. */ /* * Normalize the autocorrelation to the value with zero lag, * and divide it by the normalized autocorrelation of the window. */ r [0] = 1.0; for (long i = 1; i <= brent_ixmax; i ++) r [- i] = r [i] = ac [i + 1] / (ac [1] * windowR [i + 1]); } // Create (too much) space for candidates Pitch_Frame_init (pitchFrame, maxnCandidates); // Register the first candidate, which is always present: voicelessness. pitchFrame->nCandidates = 1; pitchFrame->candidate[1].frequency = 0.0; /* Voiceless: always present. */ pitchFrame->candidate[1].strength = 0.0; /* * Shortcut: absolute silence is always voiceless. * Go to next frame. */ if (localPeak == 0) continue; /* * Find the strongest maxima of the correlation of this frame, * and register them as candidates. */ imax[1] = 0; for (long i = 2; i < maximumLag && i < brent_ixmax; i ++) if (r[i] > 0.5 * voicingThreshold && /* Not too unvoiced? */ r[i] > r[i-1] && r[i] >= r[i+1]) /* Maximum ? */ { int place = 0; // Use parabolic interpolation for first estimate of frequency,and sin(x)/x interpolation to compute the strength of this frequency. double dr = 0.5 * (r[i+1] - r[i-1]); double d2r = 2 * r[i] - r[i-1] - r[i+1]; double frequencyOfMaximum = 1 / my dx / (i + dr / d2r); long offset = - brent_ixmax - 1; double strengthOfMaximum = /* method & 1 ? */ NUM_interpolate_sinc (& r[offset], brent_ixmax - offset, 1 / my dx / frequencyOfMaximum - offset, 30) /* : r [i] + 0.5 * dr * dr / d2r */; /* High values due to short windows are to be reflected around 1. */ if (strengthOfMaximum > 1.0) strengthOfMaximum = 1.0 / strengthOfMaximum; // Find a place for this maximum. if (pitchFrame->nCandidates < thy maxnCandidates) { /* Is there still a free place? */ place = ++ pitchFrame->nCandidates; } else { /* Try the place of the weakest candidate so far. */ double weakest = 2; for (int iweak = 2; iweak <= thy maxnCandidates; iweak ++) { //iweak = 2; iweak <= thy maxnCandidates; /* High frequencies are to be favoured */ /* if we want to analyze a perfectly periodic signal correctly. */ double localStrength = pitchFrame->candidate[iweak].strength - octaveCost * NUMlog2 (minimumPitch / pitchFrame->candidate[iweak].frequency); if (localStrength < weakest) { weakest = localStrength; place = iweak; } } /* If this maximum is weaker than the weakest candidate so far, give it no place. */ if (strengthOfMaximum - octaveCost * NUMlog2 (minimumPitch / frequencyOfMaximum) <= weakest) place = 0; } if (place) { /* Have we found a place for this candidate? */ pitchFrame->candidate[place].frequency = frequencyOfMaximum; pitchFrame->candidate[place].strength = strengthOfMaximum; imax [place] = i; } } // Second pass: for extra precision, maximize sin(x)/x interpolation ('sinc'). for (long i = 2; i <= pitchFrame->nCandidates; i ++) { if (method != AC_HANNING || pitchFrame->candidate[i].frequency > 0.0 / my dx) { double xmid, ymid; long offset = - brent_ixmax - 1; ymid = NUMimproveMaximum (& r[offset], brent_ixmax - offset, imax[i] - offset, pitchFrame->candidate[i].frequency > 0.3 / my dx ? NUM_PEAK_INTERPOLATE_SINC700 : brent_depth, & xmid); xmid += offset; pitchFrame->candidate[i].frequency = 1.0 / my dx / xmid; if (ymid > 1.0) ymid = 1.0 / ymid; pitchFrame->candidate[i].strength = ymid; } } } /* Next frame. */ Pitch_pathFinder (thee, silenceThreshold, voicingThreshold,octaveCost, octaveJumpCost, voicedUnvoicedCost, ceiling, false); //false: Melder_debug == 31 ? true : false Melder_debug 31: Pitch analysis: formant pulling on return thee; }
SPINET Sound_to_SPINET (Sound me, double timeStep, double windowDuration, double minimumFrequencyHz, double maximumFrequencyHz, long nFilters, double excitationErbProportion, double inhibitionErbProportion) { Sound window = NULL, frame = NULL; SPINET thee = NULL; long i, j, k, numberOfFrames; double firstTime, b = 1.02, samplingFrequency = 1 / my dx; double *f = NULL, *bw = NULL, *aex = NULL, *ain = NULL; if (timeStep < my dx) timeStep = my dx; if (maximumFrequencyHz > samplingFrequency / 2) maximumFrequencyHz = samplingFrequency / 2; if (! Sampled_shortTermAnalysis (me, windowDuration, timeStep, &numberOfFrames, &firstTime) || ! (thee = SPINET_create (my xmin, my xmax, numberOfFrames, timeStep, firstTime, minimumFrequencyHz, maximumFrequencyHz, nFilters, excitationErbProportion, inhibitionErbProportion)) || ! (window = Sound_createGaussian (windowDuration, samplingFrequency)) || ! (frame = Sound_createSimple (1, windowDuration, samplingFrequency)) || ! (f = NUMdvector (1, nFilters)) || ! (bw = NUMdvector (1, nFilters)) || ! (aex = NUMdvector (1, nFilters)) || ! (ain = NUMdvector (1, nFilters))) goto cleanup; /* Cochlear filterbank: gammatone */ for (i=1; i <= nFilters; i++) { f[i] = NUMerbToHertz (thy y1 + (i - 1) * thy dy); bw[i] = 2 * NUMpi * b * (f[i] * (6.23e-6 * f[i] + 93.39e-3) + 28.52); } Melder_progress1 (0.0, L"SPINET analysis"); for (i=1; i <= nFilters; i++) { Sound gammaTone = NULL, filtered = NULL; /* Contribution of outer & middle ear and phase locking */ double bb = (f[i] / 1000) * exp (- f[i] / 1000); /* Time where gammafunction envelope has its maximum */ double tgammaMax = (thy gamma - 1) / bw[i]; /* Amplitude at tgammaMax */ double gammaMaxAmplitude = pow ((thy gamma - 1) / (NUMe * bw[i]), (thy gamma - 1)); double timeCorrection = tgammaMax - windowDuration / 2; if (! (gammaTone = Sound_createGammaTone (0, 0.1, samplingFrequency, thy gamma, b, f[i], 0, 0, 0)) || /* filtering can be made 30% faster by taking Spectrum(me) outside the loop */ ! (filtered = Sounds_convolve (me, gammaTone, kSounds_convolve_scaling_SUM, kSounds_convolve_signalOutsideTimeDomain_ZERO))) { forget (gammaTone); goto cleanup; } /* To energy measure: weigh with broad-band transfer function */ for (j=1; j <= numberOfFrames; j++) { Sound_into_Sound (filtered, frame, Sampled_indexToX (thee, j) + timeCorrection); Sounds_multiply (frame, window); thy y[i][j] = Sound_power (frame) * bb / gammaMaxAmplitude; } forget (filtered); forget (gammaTone); if (! Melder_progress5 ((double)i / nFilters, L"SPINET: filter ", Melder_integer (i), L" from ", Melder_integer (nFilters), L".")) goto cleanup; } /* Excitatory and inhibitory area functions */ for (i=1; i <= nFilters; i++) { for (k=1; k <= nFilters; k++) { double fr = (f[k] - f[i]) / bw[i]; aex[i] += fgamma (fr / thy excitationErbProportion, thy gamma); ain[i] += fgamma (fr / thy inhibitionErbProportion, thy gamma); } } /* On-center off-surround interactions */ for (j=1; j <= numberOfFrames; j++) for (i=1; i <= nFilters; i++) { double a = 0; for (k=1; k <= nFilters; k++) { double fr = (f[k] - f[i]) / bw[i]; double hexsq = fgamma (fr / thy excitationErbProportion, thy gamma); double hinsq = fgamma (fr / thy inhibitionErbProportion, thy gamma); a += thy y[k][j] * (hexsq / aex[i] - hinsq / ain[i]); } thy s[i][j] = a > 0 ? a : 0; } Melder_progress1 (1.0, NULL); cleanup: NUMdvector_free (aex, 1); NUMdvector_free (ain, 1); NUMdvector_free (f, 1); NUMdvector_free (bw, 1); forget (window); forget (frame); if (! Melder_hasError()) return thee; forget (thee); return Melder_errorp1 (L"Sound_to_SPINET: not performed."); }
LPC LPC_and_Sound_to_LPC_robust (LPC thee, Sound me, double analysisWidth, double preEmphasisFrequency, double k, int itermax, double tol, int wantlocation) { struct huber_struct struct_huber = { 0 }; try { double t1, samplingFrequency = 1.0 / my dx, tol_svd = 0.000001; double location = 0, windowDuration = 2 * analysisWidth; /* Gaussian window */ long nFrames, frameErrorCount = 0, iter = 0; long p = thy maxnCoefficients; if (my xmin != thy xmin || my xmax != thy xmax) { Melder_throw ("Time domains differ."); } if (my dx != thy samplingPeriod) { Melder_throw ("Sampling intervals differ."); } if (floor (windowDuration / my dx) < p + 1) { Melder_throw ("Analysis window too short."); } Sampled_shortTermAnalysis (me, windowDuration, thy dx, & nFrames, & t1); if (nFrames != thy nx || t1 != thy x1) { Melder_throw ("Incorrect retrieved analysis width"); } autoSound sound = Data_copy (me); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoLPC him = Data_copy (thee); huber_struct_init (&struct_huber, windowDuration, p, samplingFrequency, location, wantlocation); struct_huber.k = k; struct_huber.tol = tol; struct_huber.tol_svd = tol_svd; struct_huber.itermax = itermax; autoMelderProgress progess (L"LPC analysis"); Sound_preEmphasis (sound.peek(), preEmphasisFrequency); for (long i = 1; i <= nFrames; i++) { LPC_Frame lpc = (LPC_Frame) & thy d_frames[i]; LPC_Frame lpcto = (LPC_Frame) & his d_frames[i]; double t = Sampled_indexToX (thee, i); Sound_into_Sound (sound.peek(), sframe.peek(), t - windowDuration / 2); Vector_subtractMean (sframe.peek()); Sounds_multiply (sframe.peek(), window.peek()); try { LPC_Frames_and_Sound_huber (lpc, sframe.peek(), lpcto, & struct_huber); } catch (MelderError) { frameErrorCount++; } iter += struct_huber.iter; if ( (i % 10) == 1) { Melder_progress ( (double) i / nFrames, L"LPC analysis of frame ", Melder_integer (i), L" out of ", Melder_integer (nFrames), L"."); } } if (frameErrorCount) Melder_warning (L"Results of ", Melder_integer (frameErrorCount), L" frame(s) out of ", Melder_integer (nFrames), L" could not be optimised."); MelderInfo_writeLine4 (L"Number of iterations: ", Melder_integer (iter), L"\n Average per frame: ", Melder_double (((double) iter) / nFrames)); huber_struct_destroy (&struct_huber); return him.transfer(); } catch (MelderError) { huber_struct_destroy (&struct_huber); Melder_throw (me, ": no robust LPC created."); } }
Pitch Sound_to_Pitch_shs (Sound me, double timeStep, double minimumPitch, double maximumFrequency, double ceiling, long maxnSubharmonics, long maxnCandidates, double compressionFactor, long nPointsPerOctave) { try { double firstTime, newSamplingFrequency = 2 * maximumFrequency; double windowDuration = 2 / minimumPitch, halfWindow = windowDuration / 2; double atans = nPointsPerOctave * NUMlog2 (65.0 / 50.0) - 1; // Number of speech samples in the downsampled signal in each frame: // 100 for windowDuration == 0.04 and newSamplingFrequency == 2500 long nx = lround (windowDuration * newSamplingFrequency); // The minimum number of points for the fft is 256. long nfft = 1; while ( (nfft *= 2) < nx || nfft <= 128) { ; } long nfft2 = nfft / 2 + 1; double frameDuration = nfft / newSamplingFrequency; double df = newSamplingFrequency / nfft; // The number of points on the octave scale double fminl2 = NUMlog2 (minimumPitch), fmaxl2 = NUMlog2 (maximumFrequency); long nFrequencyPoints = (long) floor ((fmaxl2 - fminl2) * nPointsPerOctave); double dfl2 = (fmaxl2 - fminl2) / (nFrequencyPoints - 1); autoSound sound = Sound_resample (me, newSamplingFrequency, 50); long numberOfFrames; Sampled_shortTermAnalysis (sound.peek(), windowDuration, timeStep, &numberOfFrames, &firstTime); autoSound frame = Sound_createSimple (1, frameDuration, newSamplingFrequency); autoSound hamming = Sound_createHamming (nx / newSamplingFrequency, newSamplingFrequency); autoPitch thee = Pitch_create (my xmin, my xmax, numberOfFrames, timeStep, firstTime, ceiling, maxnCandidates); autoNUMvector<double> cc (1, numberOfFrames); autoNUMvector<double> specAmp (1, nfft2); autoNUMvector<double> fl2 (1, nfft2); autoNUMvector<double> yv2 (1, nfft2); autoNUMvector<double> arctg (1, nFrequencyPoints); autoNUMvector<double> al2 (1, nFrequencyPoints); Melder_assert (frame->nx >= nx); Melder_assert (hamming->nx == nx); // Compute the absolute value of the globally largest amplitude w.r.t. the global mean. double globalMean, globalPeak; Sound_localMean (sound.peek(), sound -> xmin, sound -> xmax, &globalMean); Sound_localPeak (sound.peek(), sound -> xmin, sound -> xmax, globalMean, &globalPeak); /* For the cubic spline interpolation we need the frequencies on an octave scale, i.e., a log2 scale. All frequencies must be DIFFERENT, otherwise the cubic spline interpolation will give corrupt results. Because log2(f==0) is not defined, we use the heuristic: f[2]-f[1] == f[3]-f[2]. */ for (long i = 2; i <= nfft2; i++) { fl2[i] = NUMlog2 ( (i - 1) * df); } fl2[1] = 2 * fl2[2] - fl2[3]; // Calculate frequencies regularly spaced on a log2-scale and // the frequency weighting function. for (long i = 1; i <= nFrequencyPoints; i++) { arctg[i] = 0.5 + atan (3 * (i - atans) / nPointsPerOctave) / NUMpi; } // Perform the analysis on all frames. for (long i = 1; i <= numberOfFrames; i++) { Pitch_Frame pitchFrame = &thy frame[i]; double hm = 1, f0, pitch_strength, localMean, localPeak; double tmid = Sampled_indexToX (thee.peek(), i); /* The center of this frame */ long nx_tmp = frame -> nx; // Copy a frame from the sound, apply a hamming window. Get local 'intensity' frame -> nx = nx; /*begin vies */ Sound_into_Sound (sound.peek(), frame.peek(), tmid - halfWindow); Sounds_multiply (frame.peek(), hamming.peek()); Sound_localMean (sound.peek(), tmid - 3 * halfWindow, tmid + 3 * halfWindow, &localMean); Sound_localPeak (sound.peek(), tmid - halfWindow, tmid + halfWindow, localMean, &localPeak); pitchFrame -> intensity = localPeak > globalPeak ? 1 : localPeak / globalPeak; frame -> nx = nx_tmp; /* einde vies */ // Get the Fourier spectrum. autoSpectrum spec = Sound_to_Spectrum (frame.peek(), 1); Melder_assert (spec->nx == nfft2); // From complex spectrum to amplitude spectrum. for (long j = 1; j <= nfft2; j++) { double rs = spec -> z[1][j], is = spec -> z[2][j]; specAmp[j] = sqrt (rs * rs + is * is); } // Enhance the peaks in the spectrum. spec_enhance_SHS (specAmp.peek(), nfft2); // Smooth the enhanced spectrum. spec_smoooth_SHS (specAmp.peek(), nfft2); // Go to a logarithmic scale and perform cubic spline interpolation to get // spectral values for the increased number of frequency points. NUMspline (fl2.peek(), specAmp.peek(), nfft2, 1e30, 1e30, yv2.peek()); for (long j = 1; j <= nFrequencyPoints; j++) { double f = fminl2 + (j - 1) * dfl2; NUMsplint (fl2.peek(), specAmp.peek(), yv2.peek(), nfft2, f, &al2[j]); } // Multiply by frequency selectivity of the auditory system. for (long j = 1; j <= nFrequencyPoints; j++) al2[j] = al2[j] > 0 ? al2[j] * arctg[j] : 0; // The subharmonic summation. Shift spectra in octaves and sum. Pitch_Frame_init (pitchFrame, maxnCandidates); autoNUMvector<double> sumspec (1, nFrequencyPoints); pitchFrame -> nCandidates = 0; /* !!!!! */ for (long m = 1; m <= maxnSubharmonics + 1; m++) { long kb = 1 + (long) floor (nPointsPerOctave * NUMlog2 (m)); for (long k = kb; k <= nFrequencyPoints; k++) { sumspec[k - kb + 1] += al2[k] * hm; } hm *= compressionFactor; } // First register the voiceless candidate (always present). Pitch_Frame_addPitch (pitchFrame, 0, 0, maxnCandidates); /* Get the best local estimates for the pitch as the maxima of the subharmonic sum spectrum by parabolic interpolation on three points: The formula for a parabole with a maximum is: y(x) = a - b (x - c)^2 with a, b, c >= 0 The three points are (-x, y1), (0, y2) and (x, y3). The solution for a (the maximum) and c (the position) is: a = (2 y1 (4 y2 + y3) - y1^2 - (y3 - 4 y2)^2)/( 8 (y1 - 2 y2 + y3) c = dx (y1 - y3) / (2 (y1 - 2 y2 + y3)) (b = (2 y2 - y1 - y3) / (2 dx^2) ) */ for (long k = 2; k <= nFrequencyPoints - 1; k++) { double y1 = sumspec[k - 1], y2 = sumspec[k], y3 = sumspec[k + 1]; if (y2 > y1 && y2 >= y3) { double denum = y1 - 2 * y2 + y3, tmp = y3 - 4 * y2; double x = dfl2 * (y1 - y3) / (2 * denum); double f = pow (2, fminl2 + (k - 1) * dfl2 + x); double strength = (2 * y1 * (4 * y2 + y3) - y1 * y1 - tmp * tmp) / (8 * denum); Pitch_Frame_addPitch (pitchFrame, f, strength, maxnCandidates); } } /* Check whether f0 corresponds to an actual periodicity T = 1 / f0: correlate two signal periods of duration T, one starting at the middle of the interval and one starting T seconds before. If there is periodicity the correlation coefficient should be high. However, some sounds do not show any regularity, or very low frequency and regularity, and nevertheless have a definite pitch, e.g. Shepard sounds. */ Pitch_Frame_getPitch (pitchFrame, &f0, &pitch_strength); if (f0 > 0) { cc[i] = Sound_correlateParts (sound.peek(), tmid - 1.0 / f0, tmid, 1.0 / f0); } } // Base V/UV decision on correlation coefficients. // Resize the pitch strengths w.r.t. the cc. double vuvCriterium = 0.52; for (long i = 1; i <= numberOfFrames; i++) { Pitch_Frame_resizeStrengths (& thy frame[i], cc[i], vuvCriterium); } return thee.transfer(); } catch (MelderError) { Melder_throw (me, U": no Pitch (shs) created."); } }
PowerCepstrogram Sound_to_PowerCepstrogram_hillenbrand (Sound me, double minimumPitch, double dt) { try { // minimum analysis window has 3 periods of lowest pitch double analysisWidth = 3 / minimumPitch; if (analysisWidth > my dx * my nx) { analysisWidth = my dx * my nx; } double t1, samplingFrequency = 1 / my dx; autoSound thee; if (samplingFrequency > 30000) { samplingFrequency = samplingFrequency / 2; thee.reset (Sound_resample (me, samplingFrequency, 1)); } else { thee.reset (Data_copy (me)); } // pre-emphasis with fixed coefficient 0.9 for (long i = thy nx; i > 1; i--) { thy z[1][i] -= 0.9 * thy z[1][i - 1]; } long nosInWindow = analysisWidth * samplingFrequency, nFrames; if (nosInWindow < 8) { Melder_throw ("Analysis window too short."); } Sampled_shortTermAnalysis (thee.peek(), analysisWidth, dt, & nFrames, & t1); autoNUMvector<double> hamming (1, nosInWindow); for (long i = 1; i <= nosInWindow; i++) { hamming[i] = 0.54 -0.46 * cos(2 * NUMpi * (i - 1) / (nosInWindow - 1)); } long nfft = 8; // minimum possible while (nfft < nosInWindow) { nfft *= 2; } long nfftdiv2 = nfft / 2; autoNUMvector<double> fftbuf (1, nfft); // "complex" array autoNUMvector<double> spectrum (1, nfftdiv2 + 1); // +1 needed autoNUMfft_Table fftTable; NUMfft_Table_init (&fftTable, nfft); // sound to spectrum double qmax = 0.5 * nfft / samplingFrequency, dq = qmax / (nfftdiv2 + 1); autoPowerCepstrogram him = PowerCepstrogram_create (my xmin, my xmax, nFrames, dt, t1, 0, qmax, nfftdiv2+1, dq, 0); autoMelderProgress progress (L"Cepstrogram analysis"); for (long iframe = 1; iframe <= nFrames; iframe++) { double tbegin = t1 + (iframe - 1) * dt - analysisWidth / 2; tbegin = tbegin < thy xmin ? thy xmin : tbegin; long istart = Sampled_xToIndex (thee.peek(), tbegin); istart = istart < 1 ? 1 : istart; long iend = istart + nosInWindow - 1; iend = iend > thy nx ? thy nx : iend; for (long i = 1; i <= nosInWindow; i++) { fftbuf[i] = thy z[1][istart + i - 1] * hamming[i]; } for (long i = nosInWindow + 1; i <= nfft; i++) { fftbuf[i] = 0; } NUMfft_forward (&fftTable, fftbuf.peek()); complexfftoutput_to_power (fftbuf.peek(), nfft, spectrum.peek(), true); // log10(|fft|^2) // subtract average double specmean = spectrum[1]; for (long i = 2; i <= nfftdiv2 + 1; i++) { specmean += spectrum[i]; } specmean /= nfftdiv2 + 1; for (long i = 1; i <= nfftdiv2 + 1; i++) { spectrum[i] -= specmean; } /* * Here we diverge from Hillenbrand as he takes the fft of half of the spectral values. * H. forgets that the actual spectrum has nfft/2+1 values. Thefore, we take the inverse * transform because this keeps the number of samples a power of 2. * At the same time this results in twice as much numbers in the quefrency domain, i.e. we end with nfft/2+1 * numbers while H. has only nfft/4! */ fftbuf[1] = spectrum[1]; for (long i = 2; i < nfftdiv2 + 1; i++) { fftbuf[i+i-2] = spectrum[i]; fftbuf[i+i-1] = 0; } fftbuf[nfft] = spectrum[nfftdiv2 + 1]; NUMfft_backward (&fftTable, fftbuf.peek()); for (long i = 1; i <= nfftdiv2 + 1; i++) { his z[i][iframe] = fftbuf[i] * fftbuf[i]; } if ((iframe % 10) == 1) { Melder_progress ((double) iframe / nFrames, L"Cepstrogram analysis of frame ", Melder_integer (iframe), L" out of ", Melder_integer (nFrames), L"."); } } return him.transfer(); } catch (MelderError) { Melder_throw (me, ": no Cepstrogram created."); } }
autoSpectrogram Sound_and_Pitch_to_Spectrogram (Sound me, Pitch thee, double analysisWidth, double dt, double f1_hz, double fmax_hz, double df_hz, double relative_bw) { try { double t1, windowDuration = 2.0 * analysisWidth; /* gaussian window */ double nyquist = 0.5 / my dx, samplingFrequency = 2.0 * nyquist, fmin_hz = 0.0; long numberOfFrames, f0_undefined = 0.0; if (my xmin > thy xmin || my xmax > thy xmax) Melder_throw (U"The domain of the Sound is not included in the domain of the Pitch."); double f0_median = Pitch_getQuantile (thee, thy xmin, thy xmax, 0.5, kPitch_unit_HERTZ); if (f0_median == NUMundefined || f0_median == 0.0) { f0_median = 100.0; Melder_warning (U"Pitch values undefined. Bandwith fixed to 100 Hz. "); } if (f1_hz <= 0.0) { f1_hz = 100.0; } if (fmax_hz <= 0.0) { fmax_hz = nyquist; } if (df_hz <= 0.0) { df_hz = f0_median / 2.0; } if (relative_bw <= 0.0) { relative_bw = 1.1; } fmax_hz = MIN (fmax_hz, nyquist); long numberOfFilters = lround ( (fmax_hz - f1_hz) / df_hz); Sampled_shortTermAnalysis (me, windowDuration, dt, &numberOfFrames, &t1); autoSpectrogram him = Spectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_hz, fmax_hz, numberOfFilters, df_hz, f1_hz); // Temporary objects autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoMelderProgress progress (U"Sound & Pitch: To FormantFilter"); for (long iframe = 1; iframe <= numberOfFrames; iframe++) { double t = Sampled_indexToX (him.get(), iframe); double b, f0 = Pitch_getValueAtTime (thee, t, kPitch_unit_HERTZ, 0); if (f0 == NUMundefined || f0 == 0.0) { f0_undefined ++; f0 = f0_median; } b = relative_bw * f0; Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0); Sounds_multiply (sframe.get(), window.get()); Sound_into_Spectrogram_frame (sframe.get(), him.get(), iframe, b); if (iframe % 10 == 1) { Melder_progress ( (double) iframe / numberOfFrames, U"Frame ", iframe, U" out of ", numberOfFrames, U"."); } } _Spectrogram_windowCorrection (him.get(), window -> nx); return him; } catch (MelderError) { Melder_throw (U"FormantFilter not created from Pitch & FormantFilter."); } }
BarkFilter Sound_to_BarkFilter (Sound me, double analysisWidth, double dt, double f1_bark, double fmax_bark, double df_bark) { try { double t1, nyquist = 0.5 / my dx, samplingFrequency = 2 * nyquist; double windowDuration = 2 * analysisWidth; /* gaussian window */ double zmax = NUMhertzToBark2 (nyquist); double fmin_bark = 0; long nt, frameErrorCount = 0; // Check defaults. if (f1_bark <= 0) { f1_bark = 1; } if (fmax_bark <= 0) { fmax_bark = zmax; } if (df_bark <= 0) { df_bark = 1; } fmax_bark = MIN (fmax_bark, zmax); long nf = floor ( (fmax_bark - f1_bark) / df_bark + 0.5); if (nf <= 0) { Melder_throw ("The combination of filter parameters is not valid."); } Sampled_shortTermAnalysis (me, windowDuration, dt, & nt, & t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoBarkFilter thee = BarkFilter_create (my xmin, my xmax, nt, dt, t1, fmin_bark, fmax_bark, nf, df_bark, f1_bark); autoMelderProgress progess (L"BarkFilter analysis"); for (long i = 1; i <= nt; i++) { double t = Sampled_indexToX (thee.peek(), i); Sound_into_Sound (me, sframe.peek(), t - windowDuration / 2); Sounds_multiply (sframe.peek(), window.peek()); if (! Sound_into_BarkFilter_frame (sframe.peek(), thee.peek(), i)) { frameErrorCount++; } if ( (i % 10) == 1) { Melder_progress ( (double) i / nt, L"BarkFilter analysis: frame ", Melder_integer (i), L" from ", Melder_integer (nt), L"."); therror } } if (frameErrorCount > 0) { Melder_warning (L"Analysis results of ", Melder_integer (frameErrorCount), L" frame(s) out of ", Melder_integer (nt), L" will be suspect."); } double ref = FilterBank_DBREF * gaussian_window_squared_correction (window -> nx); NUMdmatrix_to_dBs (thy z, 1, thy ny, 1, thy nx, ref, FilterBank_DBFAC, FilterBank_DBFLOOR); return thee.transfer(); } catch (MelderError) {
SPINET Sound_to_SPINET (Sound me, double timeStep, double windowDuration, double minimumFrequencyHz, double maximumFrequencyHz, long nFilters, double excitationErbProportion, double inhibitionErbProportion) { try { double firstTime, b = 1.02, samplingFrequency = 1 / my dx; if (timeStep < my dx) { timeStep = my dx; } if (maximumFrequencyHz > samplingFrequency / 2) { maximumFrequencyHz = samplingFrequency / 2; } long numberOfFrames; Sampled_shortTermAnalysis (me, windowDuration, timeStep, &numberOfFrames, &firstTime); autoSPINET thee = SPINET_create (my xmin, my xmax, numberOfFrames, timeStep, firstTime, minimumFrequencyHz, maximumFrequencyHz, nFilters, excitationErbProportion, inhibitionErbProportion); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoSound frame = Sound_createSimple (1, windowDuration, samplingFrequency); autoNUMvector<double> f (1, nFilters); autoNUMvector<double> bw (1, nFilters); autoNUMvector<double> aex (1, nFilters); autoNUMvector<double> ain (1, nFilters); // Cochlear filterbank: gammatone for (long i = 1; i <= nFilters; i++) { f[i] = NUMerbToHertz (thy y1 + (i - 1) * thy dy); bw[i] = 2 * NUMpi * b * (f[i] * (6.23e-6 * f[i] + 93.39e-3) + 28.52); } autoMelderProgress progress (L"SPINET analysis"); for (long i = 1; i <= nFilters; i++) { double bb = (f[i] / 1000) * exp (- f[i] / 1000); // outer & middle ear and phase locking double tgammaMax = (thy gamma - 1) / bw[i]; // Time where gammafunction envelope has maximum double gammaMaxAmplitude = pow ( (thy gamma - 1) / (NUMe * bw[i]), (thy gamma - 1)); // tgammaMax double timeCorrection = tgammaMax - windowDuration / 2; autoSound gammaTone = Sound_createGammaTone (0, 0.1, samplingFrequency, thy gamma, b, f[i], 0, 0, 0); autoSound filtered = Sounds_convolve (me, gammaTone.peek(), kSounds_convolve_scaling_SUM, kSounds_convolve_signalOutsideTimeDomain_ZERO); // To energy measure: weigh with broad-band transfer function for (long j = 1; j <= numberOfFrames; j++) { Sound_into_Sound (filtered.peek(), frame.peek(), Sampled_indexToX (thee.peek(), j) + timeCorrection); Sounds_multiply (frame.peek(), window.peek()); thy y[i][j] = Sound_power (frame.peek()) * bb / gammaMaxAmplitude; } Melder_progress ( (double) i / nFilters, L"SPINET: filter ", Melder_integer (i), L" from ", Melder_integer (nFilters), L"."); } // Excitatory and inhibitory area functions for (long i = 1; i <= nFilters; i++) { for (long k = 1; k <= nFilters; k++) { double fr = (f[k] - f[i]) / bw[i]; aex[i] += fgamma (fr / thy excitationErbProportion, thy gamma); ain[i] += fgamma (fr / thy inhibitionErbProportion, thy gamma); } } // On-center off-surround interactions for (long j = 1; j <= numberOfFrames; j++) for (long i = 1; i <= nFilters; i++) { double a = 0; for (long k = 1; k <= nFilters; k++) { double fr = (f[k] - f[i]) / bw[i]; double hexsq = fgamma (fr / thy excitationErbProportion, thy gamma); double hinsq = fgamma (fr / thy inhibitionErbProportion, thy gamma); a += thy y[k][j] * (hexsq / aex[i] - hinsq / ain[i]); } thy s[i][j] = a > 0 ? a : 0; } return thee.transfer(); } catch (MelderError) { Melder_throw (me, ": no SPINET created."); } }