void Minimizer_minimizeManyTimes (Minimizer me, long numberOfTimes, long maxIterationsPerTime, double tolerance) { double fopt = my minimum; int monitorSingle = numberOfTimes == 1; autoNUMvector<double> popt (NUMvector_copy<double> (my p, 1, my nParameters), 1); if (! monitorSingle) { Melder_progress (0.0, U"Minimize many times"); } /* on first iteration start with current parameters 27/11/97 */ for (long i = 1; i <= numberOfTimes; i++) { Minimizer_minimize (me, maxIterationsPerTime, tolerance, monitorSingle); Melder_casual (U"Current ", i, U": minimum = ", my minimum); if (my minimum < fopt) { NUMvector_copyElements (my p, popt.peek(), 1, my nParameters); fopt = my minimum; } Minimizer_reset (me, nullptr); if (! monitorSingle) { try { Melder_progress ((double) i / numberOfTimes, i, U" from ", numberOfTimes); } catch (MelderError) { Melder_clearError (); // interrupted, no error break; } } } if (! monitorSingle) { Melder_progress (1.0); } Minimizer_reset (me, popt.peek()); }
static LPC _Sound_to_LPC (Sound me, int predictionOrder, double analysisWidth, double dt, double preEmphasisFrequency, int method, double tol1, double tol2) { double t1, samplingFrequency = 1.0 / my dx; double windowDuration = 2 * analysisWidth; /* gaussian window */ long nFrames, frameErrorCount = 0; if (floor (windowDuration / my dx) < predictionOrder + 1) Melder_throw ("Analysis window duration too short.\n" "For a prediction order of ", predictionOrder, " the analysis window duration has to be greater than ", my dx * (predictionOrder + 1), "Please increase the analysis window duration or lower the prediction order."); // Convenience: analyse the whole sound into one LPC_frame if (windowDuration > my dx * my nx) { windowDuration = my dx * my nx; } Sampled_shortTermAnalysis (me, windowDuration, dt, & nFrames, & t1); autoSound sound = Data_copy (me); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoLPC thee = LPC_create (my xmin, my xmax, nFrames, dt, t1, predictionOrder, my dx); autoMelderProgress progress (L"LPC analysis"); if (preEmphasisFrequency < samplingFrequency / 2) { Sound_preEmphasis (sound.peek(), preEmphasisFrequency); } for (long i = 1; i <= nFrames; i++) { LPC_Frame lpcframe = (LPC_Frame) & thy d_frames[i]; double t = Sampled_indexToX (thee.peek(), i); LPC_Frame_init (lpcframe, predictionOrder); Sound_into_Sound (sound.peek(), sframe.peek(), t - windowDuration / 2); Vector_subtractMean (sframe.peek()); Sounds_multiply (sframe.peek(), window.peek()); if (method == LPC_METHOD_AUTO) { if (! Sound_into_LPC_Frame_auto (sframe.peek(), lpcframe)) { frameErrorCount++; } } else if (method == LPC_METHOD_COVAR) { if (! Sound_into_LPC_Frame_covar (sframe.peek(), lpcframe)) { frameErrorCount++; } } else if (method == LPC_METHOD_BURG) { if (! Sound_into_LPC_Frame_burg (sframe.peek(), lpcframe)) { frameErrorCount++; } } else if (method == LPC_METHOD_MARPLE) { if (! Sound_into_LPC_Frame_marple (sframe.peek(), lpcframe, tol1, tol2)) { frameErrorCount++; } } if ( (i % 10) == 1) { Melder_progress ( (double) i / nFrames, L"LPC analysis of frame ", Melder_integer (i), L" out of ", Melder_integer (nFrames), L"."); } } return thee.transfer(); }
Ltas PointProcess_Sound_to_Ltas_harmonics (PointProcess pulses, Sound sound, long maximumHarmonic, double shortestPeriod, double longestPeriod, double maximumPeriodFactor) { try { long numberOfPeriods = pulses -> nt - 2; autoLtas ltas = Ltas_create (maximumHarmonic, 1.0); ltas -> xmax = maximumHarmonic; if (numberOfPeriods < 1) Melder_throw ("There are no periods in the point process."); autoMelderProgress progress (L"LTAS (harmonics) analysis..."); for (long ipulse = 2; ipulse < pulses -> nt; ipulse ++) { double leftInterval = pulses -> t [ipulse] - pulses -> t [ipulse - 1]; double rightInterval = pulses -> t [ipulse + 1] - pulses -> t [ipulse]; double intervalFactor = leftInterval > rightInterval ? leftInterval / rightInterval : rightInterval / leftInterval; Melder_progress ((double) ipulse / pulses -> nt, L"Sound & PointProcess: To Ltas: pulse ", Melder_integer (ipulse), L" out of ", Melder_integer (pulses -> nt)); if (leftInterval >= shortestPeriod && leftInterval <= longestPeriod && rightInterval >= shortestPeriod && rightInterval <= longestPeriod && intervalFactor <= maximumPeriodFactor) { /* * We have a period! Compute the spectrum. */ long localMaximumHarmonic; autoSound period = Sound_extractPart (sound, pulses -> t [ipulse] - 0.5 * leftInterval, pulses -> t [ipulse] + 0.5 * rightInterval, kSound_windowShape_RECTANGULAR, 1.0, FALSE); autoSpectrum spectrum = Sound_to_Spectrum (period.peek(), FALSE); localMaximumHarmonic = maximumHarmonic < spectrum -> nx ? maximumHarmonic : spectrum -> nx; for (long iharm = 1; iharm <= localMaximumHarmonic; iharm ++) { double realPart = spectrum -> z [1] [iharm]; double imaginaryPart = spectrum -> z [2] [iharm]; double energy = (realPart * realPart + imaginaryPart * imaginaryPart) * 2.0 * spectrum -> dx; ltas -> z [1] [iharm] += energy; } } else { numberOfPeriods -= 1; } } if (numberOfPeriods < 1) Melder_throw (L"There are no periods in the point process."); for (long iharm = 1; iharm <= ltas -> nx; iharm ++) { if (ltas -> z [1] [iharm] == 0.0) { ltas -> z [1] [iharm] = -300.0; } else { double energyInThisBand = ltas -> z [1] [iharm]; double powerInThisBand = energyInThisBand / (sound -> xmax - sound -> xmin); ltas -> z [1] [iharm] = 10.0 * log10 (powerInThisBand / 4.0e-10); } } return ltas.transfer(); } catch (MelderError) { Melder_throw (sound, " & ", pulses, ": LTAS analysis (harmonics) not performed."); } }
autoMelSpectrogram Sound_to_MelSpectrogram (Sound me, double analysisWidth, double dt, double f1_mel, double fmax_mel, double df_mel) { try { double t1, samplingFrequency = 1.0 / my dx, nyquist = 0.5 * samplingFrequency; double windowDuration = 2.0 * analysisWidth; // gaussian window double fmin_mel = 0.0; double fbottom = NUMhertzToMel2 (100.0), fceiling = NUMhertzToMel2 (nyquist); long numberOfFrames; // Check defaults. if (fmax_mel <= 0.0 || fmax_mel > fceiling) { fmax_mel = fceiling; } if (fmax_mel <= f1_mel) { f1_mel = fbottom; fmax_mel = fceiling; } if (f1_mel <= 0.0) { f1_mel = fbottom; } if (df_mel <= 0.0) { df_mel = 100.0; } // Determine the number of filters. long numberOfFilters = lround ((fmax_mel - f1_mel) / df_mel); fmax_mel = f1_mel + numberOfFilters * df_mel; Sampled_shortTermAnalysis (me, windowDuration, dt, &numberOfFrames, &t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoMelSpectrogram thee = MelSpectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_mel, fmax_mel, numberOfFilters, df_mel, f1_mel); autoMelderProgress progress (U"MelSpectrograms analysis"); for (long iframe = 1; iframe <= numberOfFrames; iframe++) { double t = Sampled_indexToX (thee.get(), iframe); Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0); Sounds_multiply (sframe.get(), window.get()); Sound_into_MelSpectrogram_frame (sframe.get(), thee.get(), iframe); if (iframe % 10 == 1) { Melder_progress ((double) iframe / numberOfFrames, U"Frame ", iframe, U" out of ", numberOfFrames, U"."); } } _Spectrogram_windowCorrection ((Spectrogram) thee.get(), window -> nx); return thee; } catch (MelderError) { Melder_throw (me, U": no MelSpectrogram created."); } }
autoBarkSpectrogram Sound_to_BarkSpectrogram (Sound me, double analysisWidth, double dt, double f1_bark, double fmax_bark, double df_bark) { try { double nyquist = 0.5 / my dx, samplingFrequency = 2 * nyquist; double windowDuration = 2 * analysisWidth; /* gaussian window */ double zmax = NUMhertzToBark2 (nyquist); double fmin_bark = 0; // Check defaults. if (f1_bark <= 0) { f1_bark = 1; } if (fmax_bark <= 0) { fmax_bark = zmax; } if (df_bark <= 0) { df_bark = 1; } fmax_bark = MIN (fmax_bark, zmax); long numberOfFilters = lround ( (fmax_bark - f1_bark) / df_bark); if (numberOfFilters <= 0) { Melder_throw (U"The combination of filter parameters is not valid."); } long numberOfFrames; double t1; Sampled_shortTermAnalysis (me, windowDuration, dt, & numberOfFrames, & t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoBarkSpectrogram thee = BarkSpectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_bark, fmax_bark, numberOfFilters, df_bark, f1_bark); autoMelderProgress progess (U"BarkSpectrogram analysis"); for (long iframe = 1; iframe <= numberOfFrames; iframe++) { double t = Sampled_indexToX (thee.get(), iframe); Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0); Sounds_multiply (sframe.get(), window.get()); Sound_into_BarkSpectrogram_frame (sframe.get(), thee.get(), iframe); if (iframe % 10 == 1) { Melder_progress ( (double) iframe / numberOfFrames, U"BarkSpectrogram analysis: frame ", iframe, U" from ", numberOfFrames, U"."); } } _Spectrogram_windowCorrection ((Spectrogram) thee.get(), window -> nx); return thee; } catch (MelderError) { Melder_throw (me, U": no BarkSpectrogram created."); } }
Cepstrogram Sound_to_Cepstrogram (Sound me, double analysisWidth, double dt, double maximumFrequency) { try { double windowDuration = 2 * analysisWidth; /* gaussian window */ long nFrames; // Convenience: analyse the whole sound into one Cepstrogram_frame if (windowDuration > my dx * my nx) { windowDuration = my dx * my nx; } double t1, samplingFrequency = 2 * maximumFrequency; autoSound sound = Sound_resample (me, samplingFrequency, 50); Sampled_shortTermAnalysis (me, windowDuration, dt, & nFrames, & t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); double qmin, qmax, dq, q1; long nq; { // laziness: find out the proper dimensions autoSpectrum spec = Sound_to_Spectrum (sframe.peek(), 1); autoCepstrum cepstrum = Spectrum_to_Cepstrum (spec.peek()); qmin = cepstrum -> xmin; qmax = cepstrum -> xmax; dq = cepstrum -> dx; q1 = cepstrum -> x1; nq = cepstrum -> nx; } autoCepstrogram thee = Cepstrogram_create (my xmin, my xmax, nFrames, dt, t1, qmin, qmax, nq, dq, q1); autoMelderProgress progress (L"Cepstrogram analysis"); for (long iframe = 1; iframe <= nFrames; iframe++) { double t = Sampled_indexToX (thee.peek(), iframe); Sound_into_Sound (sound.peek(), sframe.peek(), t - windowDuration / 2); Vector_subtractMean (sframe.peek()); Sounds_multiply (sframe.peek(), window.peek()); autoSpectrum spec = Sound_to_Spectrum (sframe.peek(), 1); autoCepstrum cepstrum = Spectrum_to_Cepstrum (spec.peek()); for (long i = 1; i <= nq; i++) { thy z[i][iframe] = cepstrum -> z[1][i]; } if ((iframe % 10) == 1) { Melder_progress ((double) iframe / nFrames, L"Cepstrogram analysis of frame ", Melder_integer (iframe), L" out of ", Melder_integer (nFrames), L"."); } } return thee.transfer(); } catch (MelderError) { Melder_throw (me, ": no Cepstrogram created."); } }
PowerCepstrogram Sound_to_PowerCepstrogram (Sound me, double pitchFloor, double dt, double maximumFrequency, double preEmphasisFrequency) { try { // minimum analysis window has 3 periods of lowest pitch double analysisWidth = 3 / pitchFloor; double windowDuration = 2 * analysisWidth; /* gaussian window */ long nFrames; // Convenience: analyse the whole sound into one Cepstrogram_frame if (windowDuration > my dx * my nx) { windowDuration = my dx * my nx; } double t1, samplingFrequency = 2 * maximumFrequency; autoSound sound = Sound_resample (me, samplingFrequency, 50); Sound_preEmphasis (sound.peek(), preEmphasisFrequency); Sampled_shortTermAnalysis (me, windowDuration, dt, & nFrames, & t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); // find out the size of the FFT long nfft = 2; while (nfft < sframe -> nx) nfft *= 2; long nq = nfft / 2 + 1; double qmax = 0.5 * nfft / samplingFrequency, dq = qmax / (nq - 1); autoPowerCepstrogram thee = PowerCepstrogram_create (my xmin, my xmax, nFrames, dt, t1, 0, qmax, nq, dq, 0); autoMelderProgress progress (L"Cepstrogram analysis"); for (long iframe = 1; iframe <= nFrames; iframe++) { double t = Sampled_indexToX (thee.peek(), iframe); Sound_into_Sound (sound.peek(), sframe.peek(), t - windowDuration / 2); Vector_subtractMean (sframe.peek()); Sounds_multiply (sframe.peek(), window.peek()); autoSpectrum spec = Sound_to_Spectrum (sframe.peek(), 1); // FFT yes autoPowerCepstrum cepstrum = Spectrum_to_PowerCepstrum (spec.peek()); for (long i = 1; i <= nq; i++) { thy z[i][iframe] = cepstrum -> z[1][i]; } if ((iframe % 10) == 1) { Melder_progress ((double) iframe / nFrames, L"PowerCepstrogram analysis of frame ", Melder_integer (iframe), L" out of ", Melder_integer (nFrames), L"."); } } return thee.transfer(); } catch (MelderError) { Melder_throw (me, ": no PowerCepstrogram created."); } }
Formant LPC_to_Formant (LPC me, double margin) { try { double samplingFrequency = 1.0 / my samplingPeriod; long nmax = my maxnCoefficients, err = 0; long interval = nmax > 20 ? 1 : 10; if (nmax > 99) { Melder_throw ("We cannot find the roots of a polynomial of order > 99."); } if (margin >= samplingFrequency / 4) { Melder_throw ("Margin must be smaller than ", samplingFrequency / 4, "."); } autoFormant thee = Formant_create (my xmin, my xmax, my nx, my dx, my x1, (nmax + 1) / 2); autoMelderProgress progress (L"LPC to Formant"); for (long i = 1; i <= my nx; i++) { Formant_Frame formant = & thy d_frames[i]; LPC_Frame lpc = & my d_frames[i]; // Initialisation of Formant_Frame is taken care of in Roots_into_Formant_Frame! try { LPC_Frame_into_Formant_Frame (lpc, formant, my samplingPeriod, margin); } catch (MelderError) { Melder_clearError(); err++; } if ( (interval == 1 || (i % interval) == 1)) { Melder_progress ( (double) i / my nx, L"LPC to Formant: frame ", Melder_integer (i), L" out of ", Melder_integer (my nx), L"."); } } Formant_sort (thee.peek()); if (err > 0) { Melder_warning (Melder_integer (err), L" formant frames out of ", Melder_integer (my nx), L" suspect."); } return thee.transfer(); } catch (MelderError) { Melder_throw (me, ": no Formant created."); } }
FormantFilter Sound_and_Pitch_to_FormantFilter (Sound me, Pitch thee, double analysisWidth, double dt, double f1_hz, double fmax_hz, double df_hz, double relative_bw) { try { double t1, windowDuration = 2 * analysisWidth; /* gaussian window */ double nyquist = 0.5 / my dx, samplingFrequency = 2 * nyquist, fmin_hz = 0; long nt, f0_undefined = 0; if (my xmin > thy xmin || my xmax > thy xmax) Melder_throw ("The domain of the Sound is not included in the domain of the Pitch."); double f0_median = Pitch_getQuantile (thee, thy xmin, thy xmax, 0.5, kPitch_unit_HERTZ); if (f0_median == NUMundefined || f0_median == 0) { f0_median = 100; Melder_warning (L"Pitch values undefined. Bandwith fixed to 100 Hz. "); } if (f1_hz <= 0) { f1_hz = 100; } if (fmax_hz <= 0) { fmax_hz = nyquist; } if (df_hz <= 0) { df_hz = f0_median / 2; } if (relative_bw <= 0) { relative_bw = 1.1; } fmax_hz = MIN (fmax_hz, nyquist); long nf = floor ( (fmax_hz - f1_hz) / df_hz + 0.5); Sampled_shortTermAnalysis (me, windowDuration, dt, &nt, &t1); autoFormantFilter him = FormantFilter_create (my xmin, my xmax, nt, dt, t1, fmin_hz, fmax_hz, nf, df_hz, f1_hz); // Temporary objects autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoMelderProgress progress (L"Sound & Pitch: To FormantFilter"); for (long i = 1; i <= nt; i++) { double t = Sampled_indexToX (him.peek(), i); double b, f0 = Pitch_getValueAtTime (thee, t, kPitch_unit_HERTZ, 0); if (f0 == NUMundefined || f0 == 0) { f0_undefined++; f0 = f0_median; } b = relative_bw * f0; Sound_into_Sound (me, sframe.peek(), t - windowDuration / 2); Sounds_multiply (sframe.peek(), window.peek()); Sound_into_FormantFilter_frame (sframe.peek(), him.peek(), i, b); if ( (i % 10) == 1) { Melder_progress ( (double) i / nt, L"Frame ", Melder_integer (i), L" out of ", Melder_integer (nt), L"."); } } double ref = FilterBank_DBREF * gaussian_window_squared_correction (window -> nx); NUMdmatrix_to_dBs (his z, 1, his ny, 1, his nx, ref, FilterBank_DBFAC, FilterBank_DBFLOOR); return him.transfer(); } catch (MelderError) { Melder_throw ("FormantFilter not created from Pitch & FormantFilter."); } }
MelFilter Sound_to_MelFilter (Sound me, double analysisWidth, double dt, double f1_mel, double fmax_mel, double df_mel) { try { double t1, samplingFrequency = 1 / my dx, nyquist = 0.5 * samplingFrequency; double windowDuration = 2 * analysisWidth; /* gaussian window */ double fmin_mel = 0; double fbottom = HZTOMEL (100.0), fceiling = HZTOMEL (nyquist); long nt, frameErrorCount = 0; // Check defaults. if (fmax_mel <= 0 || fmax_mel > fceiling) { fmax_mel = fceiling; } if (fmax_mel <= f1_mel) { f1_mel = fbottom; fmax_mel = fceiling; } if (f1_mel <= 0) { f1_mel = fbottom; } if (df_mel <= 0) { df_mel = 100.0; } // Determine the number of filters. long nf = floor ( (fmax_mel - f1_mel) / df_mel + 0.5); fmax_mel = f1_mel + nf * df_mel; Sampled_shortTermAnalysis (me, windowDuration, dt, &nt, &t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoMelFilter thee = MelFilter_create (my xmin, my xmax, nt, dt, t1, fmin_mel, fmax_mel, nf, df_mel, f1_mel); autoMelderProgress progress (L"MelFilters analysis"); for (long i = 1; i <= nt; i++) { double t = Sampled_indexToX (thee.peek(), i); Sound_into_Sound (me, sframe.peek(), t - windowDuration / 2); Sounds_multiply (sframe.peek(), window.peek()); if (! Sound_into_MelFilter_frame (sframe.peek(), thee.peek(), i)) { frameErrorCount++; } if ( (i % 10) == 1) { Melder_progress ( (double) i / nt, L"Frame ", Melder_integer (i), L" out of ", Melder_integer (nt), L"."); } } if (frameErrorCount) Melder_warning (L"Analysis results of ", Melder_integer (frameErrorCount), L" frame(s) out of ", Melder_integer (nt), L" will be suspect."); // Window correction. double ref = FilterBank_DBREF * gaussian_window_squared_correction (window -> nx); NUMdmatrix_to_dBs (thy z, 1, thy ny, 1, thy nx, ref, FilterBank_DBFAC, FilterBank_DBFLOOR); return thee.transfer(); } catch (MelderError) { Melder_throw (me, ": no MelFilter created."); } }
SPINET Sound_to_SPINET (Sound me, double timeStep, double windowDuration, double minimumFrequencyHz, double maximumFrequencyHz, long nFilters, double excitationErbProportion, double inhibitionErbProportion) { try { double firstTime, b = 1.02, samplingFrequency = 1 / my dx; if (timeStep < my dx) { timeStep = my dx; } if (maximumFrequencyHz > samplingFrequency / 2) { maximumFrequencyHz = samplingFrequency / 2; } long numberOfFrames; Sampled_shortTermAnalysis (me, windowDuration, timeStep, &numberOfFrames, &firstTime); autoSPINET thee = SPINET_create (my xmin, my xmax, numberOfFrames, timeStep, firstTime, minimumFrequencyHz, maximumFrequencyHz, nFilters, excitationErbProportion, inhibitionErbProportion); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoSound frame = Sound_createSimple (1, windowDuration, samplingFrequency); autoNUMvector<double> f (1, nFilters); autoNUMvector<double> bw (1, nFilters); autoNUMvector<double> aex (1, nFilters); autoNUMvector<double> ain (1, nFilters); // Cochlear filterbank: gammatone for (long i = 1; i <= nFilters; i++) { f[i] = NUMerbToHertz (thy y1 + (i - 1) * thy dy); bw[i] = 2 * NUMpi * b * (f[i] * (6.23e-6 * f[i] + 93.39e-3) + 28.52); } autoMelderProgress progress (L"SPINET analysis"); for (long i = 1; i <= nFilters; i++) { double bb = (f[i] / 1000) * exp (- f[i] / 1000); // outer & middle ear and phase locking double tgammaMax = (thy gamma - 1) / bw[i]; // Time where gammafunction envelope has maximum double gammaMaxAmplitude = pow ( (thy gamma - 1) / (NUMe * bw[i]), (thy gamma - 1)); // tgammaMax double timeCorrection = tgammaMax - windowDuration / 2; autoSound gammaTone = Sound_createGammaTone (0, 0.1, samplingFrequency, thy gamma, b, f[i], 0, 0, 0); autoSound filtered = Sounds_convolve (me, gammaTone.peek(), kSounds_convolve_scaling_SUM, kSounds_convolve_signalOutsideTimeDomain_ZERO); // To energy measure: weigh with broad-band transfer function for (long j = 1; j <= numberOfFrames; j++) { Sound_into_Sound (filtered.peek(), frame.peek(), Sampled_indexToX (thee.peek(), j) + timeCorrection); Sounds_multiply (frame.peek(), window.peek()); thy y[i][j] = Sound_power (frame.peek()) * bb / gammaMaxAmplitude; } Melder_progress ( (double) i / nFilters, L"SPINET: filter ", Melder_integer (i), L" from ", Melder_integer (nFilters), L"."); } // Excitatory and inhibitory area functions for (long i = 1; i <= nFilters; i++) { for (long k = 1; k <= nFilters; k++) { double fr = (f[k] - f[i]) / bw[i]; aex[i] += fgamma (fr / thy excitationErbProportion, thy gamma); ain[i] += fgamma (fr / thy inhibitionErbProportion, thy gamma); } } // On-center off-surround interactions for (long j = 1; j <= numberOfFrames; j++) for (long i = 1; i <= nFilters; i++) { double a = 0; for (long k = 1; k <= nFilters; k++) { double fr = (f[k] - f[i]) / bw[i]; double hexsq = fgamma (fr / thy excitationErbProportion, thy gamma); double hinsq = fgamma (fr / thy inhibitionErbProportion, thy gamma); a += thy y[k][j] * (hexsq / aex[i] - hinsq / ain[i]); } thy s[i][j] = a > 0 ? a : 0; } return thee.transfer(); } catch (MelderError) { Melder_throw (me, ": no SPINET created."); } }
static void Diagonalizer_and_CrossCorrelationTable_qdiag (Diagonalizer me, CrossCorrelationTables thee, double *cweights, long maxNumberOfIterations, double delta) { try { CrossCorrelationTable c0 = (CrossCorrelationTable) thy item[1]; double **w = my data; long dimension = c0 -> numberOfColumns; autoEigen eigen = Thing_new (Eigen); autoCrossCorrelationTables ccts = Data_copy (thee); autoNUMmatrix<double> pinv (1, dimension, 1, dimension); autoNUMmatrix<double> d (1, dimension, 1, dimension); autoNUMmatrix<double> p (1, dimension, 1, dimension); autoNUMmatrix<double> m1 (1, dimension, 1, dimension); autoNUMmatrix<double> wc (1, dimension, 1, dimension); autoNUMvector<double> wvec (1, dimension); autoNUMvector<double> wnew (1, dimension); autoNUMvector<double> mvec (1, dimension); for (long i = 1; i <= dimension; i++) // Transpose W for (long j = 1; j <= dimension; j++) { wc[i][j] = w[j][i]; } // d = diag(diag(W'*C0*W)); // W = W*d^(-1/2); NUMdmatrix_normalizeColumnVectors (wc.peek(), dimension, dimension, c0 -> data); // scale eigenvectors for sphering // [vb,db] = eig(C0); // P = db^(-1/2)*vb'; Eigen_initFromSymmetricMatrix (eigen.peek(), c0 -> data, dimension); for (long i = 1; i <= dimension; i++) { if (eigen -> eigenvalues[i] < 0) { Melder_throw (U"Covariance matrix not positive definite, eigenvalue[", i, U"] is negative."); } double scalef = 1 / sqrt (eigen -> eigenvalues[i]); for (long j = 1; j <= dimension; j++) { p[dimension - i + 1][j] = scalef * eigen -> eigenvectors[i][j]; } } // P*C[i]*P' for (long ic = 1; ic <= thy size; ic++) { CrossCorrelationTable cov1 = (CrossCorrelationTable) thy item[ic]; CrossCorrelationTable cov2 = (CrossCorrelationTable) ccts -> item[ic]; NUMdmatrices_multiply_VCVp (cov2 -> data, p.peek(), dimension, dimension, cov1 -> data, 1); } // W = P'\W == inv(P') * W NUMpseudoInverse (p.peek(), dimension, dimension, pinv.peek(), 0); NUMdmatrices_multiply_VpC (w, pinv.peek(), dimension, dimension, wc.peek(), dimension); // initialisation for order KN^3 for (long ic = 2; ic <= thy size; ic++) { CrossCorrelationTable cov = (CrossCorrelationTable) ccts -> item[ic]; // C * W NUMdmatrices_multiply_VC (m1.peek(), cov -> data, dimension, dimension, w, dimension); // D += scalef * M1*M1' NUMdmatrices_multiplyScaleAdd (d.peek(), m1.peek(), dimension, dimension, 2 * cweights[ic]); } long iter = 0; double delta_w; autoMelderProgress progress (U"Simultaneous diagonalization of many CrossCorrelationTables..."); try { do { // the standard diagonality measure is rather expensive to calculate so we compare the norms of // differences of eigenvectors. delta_w = 0; for (long kol = 1; kol <= dimension; kol++) { for (long i = 1; i <= dimension; i++) { wvec[i] = w[i][kol]; } update_one_column (ccts.peek(), d.peek(), cweights, wvec.peek(), -1, mvec.peek()); Eigen_initFromSymmetricMatrix (eigen.peek(), d.peek(), dimension); // Eigenvalues already sorted; get eigenvector of smallest ! for (long i = 1; i <= dimension; i++) { wnew[i] = eigen -> eigenvectors[dimension][i]; } update_one_column (ccts.peek(), d.peek(), cweights, wnew.peek(), 1, mvec.peek()); for (long i = 1; i <= dimension; i++) { w[i][kol] = wnew[i]; } // compare norms of eigenvectors. We have to compare ||wvec +/- w_new|| because eigenvectors // may change sign. double normp = 0, normm = 0; for (long j = 1; j <= dimension; j++) { double dm = wvec[j] - wnew[j], dp = wvec[j] + wnew[j]; normp += dm * dm; normm += dp * dp; } normp = normp < normm ? normp : normm; normp = sqrt (normp); delta_w = normp > delta_w ? normp : delta_w; } iter++; Melder_progress ((double) iter / (double) (maxNumberOfIterations + 1), U"Iteration: ", iter, U", norm: ", delta_w); } while (delta_w > delta && iter < maxNumberOfIterations); } catch (MelderError) { Melder_clearError (); } // Revert the sphering W = P'*W; // Take transpose to make W*C[i]W' diagonal instead of W'*C[i]*W => (P'*W)'=W'*P NUMmatrix_copyElements (w, wc.peek(), 1, dimension, 1, dimension); NUMdmatrices_multiply_VpC (w, wc.peek(), dimension, dimension, p.peek(), dimension); // W = W'*P: final result // Calculate the "real" diagonality measure // double dm = CrossCorrelationTables_and_Diagonalizer_getDiagonalityMeasure (thee, me, cweights, 1, thy size); } catch (MelderError) { Melder_throw (me, U" & ", thee, U": no joint diagonalization (qdiag)."); } }
autoCategories PatternList_to_Categories_cluster ( /////////////////////////////// // Parameters // /////////////////////////////// PatternList p, // source // FeatureWeights fws, // feature weights // long k, // k(!) // double s, // clustersize constraint 0 < s <= 1 // long m // reseed maximum // ) { autoCategories categories = Categories_createWithSequentialNumbers (k); if (k == p->ny) return categories; autoKNN knn = KNN_create(); if (p -> ny % k) if (s > (double) (p -> ny / k) / (double) (p -> ny / k + 1)) // FIXME check whether integer division is correct s = (double) (p -> ny / k) / (double) (p -> ny / k + 1); double progress = m; autoNUMvector <double> sizes (0L, k); autoNUMvector <long> seeds (0L, k); autoPatternList centroids = PatternList_create (k, p -> nx); autoNUMvector <double> beta (0L, centroids -> nx); do { double delta; long nfriends = 0; Melder_progress (1 - (progress - m) / progress, U""); for (long y = 1; y <= centroids->ny; y++) { int ifriend = 1; long ys = (long) lround(NUMrandomUniform(1, p->ny)); if (nfriends) { while (ifriend) { ys = (long) lround(NUMrandomUniform(1, p->ny)); for (long fc = 0; fc < nfriends; fc++) { ifriend = 0; Melder_assert (fc < k); if (seeds [fc] == ys) { ifriend = 1; break; } } } } Melder_assert (nfriends <= k); seeds [nfriends++] = ys; for (long x = 1; x <= centroids->nx; x++) centroids->z[y][x] = p->z[ys][x]; } do { delta = 0; KNN_learn (knn.get(), centroids.get(), categories.get(), kOla_REPLACE, kOla_SEQUENTIAL); autoCategories interim = KNN_classifyToCategories (knn.get(), p, fws, 1, kOla_FLAT_VOTING); for (long x = 1; x <= k; x ++) sizes [x] = 0; for (long yp = 1; yp <= categories->size; yp ++) { double alfa = 1; Melder_assert (yp <= centroids -> ny); for (long x = 1; x <= centroids -> nx; x ++) { beta [x] = centroids -> z [yp] [x]; } for (long ys = 1; ys <= interim->size; ys ++) { if (FeatureWeights_areFriends (categories->at [yp], interim->at [ys])) { for (long x = 1; x <= p -> nx; x ++) { Melder_assert (ys <= p -> ny); if (alfa == 1) { centroids -> z [yp] [x] = p -> z [ys] [x]; } else { centroids -> z [yp] [x] += (p -> z [ys] [x] - centroids -> z [yp] [x]) / alfa; } } Melder_assert (yp <= k); sizes [yp] ++; alfa ++; } } for (long x = 1; x <= centroids -> nx; x ++) { delta += fabs (beta [x] - centroids -> z [yp] [x]); } } } while (delta != 0.0); double smax = sizes [1]; double smin = sizes [1]; for (long x = 1; x <= k; x++) { if (smax < sizes [x]) smax = sizes [x]; if (smin > sizes [x]) smin = sizes [x]; } sizes [0] = smin / smax; -- m; } while (sizes[0] < s && m > 0); autoCategories output = KNN_classifyToCategories (knn.get(), p, fws, 1, kOla_FLAT_VOTING); return output; }
LPC LPC_and_Sound_to_LPC_robust (LPC thee, Sound me, double analysisWidth, double preEmphasisFrequency, double k, int itermax, double tol, int wantlocation) { struct huber_struct struct_huber = { 0 }; try { double t1, samplingFrequency = 1.0 / my dx, tol_svd = 0.000001; double location = 0, windowDuration = 2 * analysisWidth; /* Gaussian window */ long nFrames, frameErrorCount = 0, iter = 0; long p = thy maxnCoefficients; if (my xmin != thy xmin || my xmax != thy xmax) { Melder_throw ("Time domains differ."); } if (my dx != thy samplingPeriod) { Melder_throw ("Sampling intervals differ."); } if (floor (windowDuration / my dx) < p + 1) { Melder_throw ("Analysis window too short."); } Sampled_shortTermAnalysis (me, windowDuration, thy dx, & nFrames, & t1); if (nFrames != thy nx || t1 != thy x1) { Melder_throw ("Incorrect retrieved analysis width"); } autoSound sound = Data_copy (me); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoLPC him = Data_copy (thee); huber_struct_init (&struct_huber, windowDuration, p, samplingFrequency, location, wantlocation); struct_huber.k = k; struct_huber.tol = tol; struct_huber.tol_svd = tol_svd; struct_huber.itermax = itermax; autoMelderProgress progess (L"LPC analysis"); Sound_preEmphasis (sound.peek(), preEmphasisFrequency); for (long i = 1; i <= nFrames; i++) { LPC_Frame lpc = (LPC_Frame) & thy d_frames[i]; LPC_Frame lpcto = (LPC_Frame) & his d_frames[i]; double t = Sampled_indexToX (thee, i); Sound_into_Sound (sound.peek(), sframe.peek(), t - windowDuration / 2); Vector_subtractMean (sframe.peek()); Sounds_multiply (sframe.peek(), window.peek()); try { LPC_Frames_and_Sound_huber (lpc, sframe.peek(), lpcto, & struct_huber); } catch (MelderError) { frameErrorCount++; } iter += struct_huber.iter; if ( (i % 10) == 1) { Melder_progress ( (double) i / nFrames, L"LPC analysis of frame ", Melder_integer (i), L" out of ", Melder_integer (nFrames), L"."); } } if (frameErrorCount) Melder_warning (L"Results of ", Melder_integer (frameErrorCount), L" frame(s) out of ", Melder_integer (nFrames), L" could not be optimised."); MelderInfo_writeLine4 (L"Number of iterations: ", Melder_integer (iter), L"\n Average per frame: ", Melder_double (((double) iter) / nFrames)); huber_struct_destroy (&struct_huber); return him.transfer(); } catch (MelderError) { huber_struct_destroy (&struct_huber); Melder_throw (me, ": no robust LPC created."); } }
Ltas PointProcess_Sound_to_Ltas (PointProcess pulses, Sound sound, double maximumFrequency, double bandWidth, double shortestPeriod, double longestPeriod, double maximumPeriodFactor) { try { long numberOfPeriods = pulses -> nt - 2, totalNumberOfEnergies = 0; autoLtas ltas = Ltas_create (maximumFrequency / bandWidth, bandWidth); ltas -> xmax = maximumFrequency; autoLtas numbers = Data_copy (ltas.peek()); if (numberOfPeriods < 1) Melder_throw ("Cannot compute an Ltas if there are no periods in the point process."); autoMelderProgress progress (L"Ltas analysis..."); for (long ipulse = 2; ipulse < pulses -> nt; ipulse ++) { double leftInterval = pulses -> t [ipulse] - pulses -> t [ipulse - 1]; double rightInterval = pulses -> t [ipulse + 1] - pulses -> t [ipulse]; double intervalFactor = leftInterval > rightInterval ? leftInterval / rightInterval : rightInterval / leftInterval; Melder_progress ((double) ipulse / pulses -> nt, L"Sound & PointProcess: To Ltas: pulse ", Melder_integer (ipulse), L" out of ", Melder_integer (pulses -> nt)); if (leftInterval >= shortestPeriod && leftInterval <= longestPeriod && rightInterval >= shortestPeriod && rightInterval <= longestPeriod && intervalFactor <= maximumPeriodFactor) { /* * We have a period! Compute the spectrum. */ autoSound period = Sound_extractPart (sound, pulses -> t [ipulse] - 0.5 * leftInterval, pulses -> t [ipulse] + 0.5 * rightInterval, kSound_windowShape_RECTANGULAR, 1.0, FALSE); autoSpectrum spectrum = Sound_to_Spectrum (period.peek(), FALSE); for (long ifreq = 1; ifreq <= spectrum -> nx; ifreq ++) { double frequency = spectrum -> xmin + (ifreq - 1) * spectrum -> dx; double realPart = spectrum -> z [1] [ifreq]; double imaginaryPart = spectrum -> z [2] [ifreq]; double energy = (realPart * realPart + imaginaryPart * imaginaryPart) * 2.0 * spectrum -> dx /* OLD: * sound -> nx */; long iband = ceil (frequency / bandWidth); if (iband >= 1 && iband <= ltas -> nx) { ltas -> z [1] [iband] += energy; numbers -> z [1] [iband] += 1; totalNumberOfEnergies += 1; } } } else { numberOfPeriods -= 1; } } if (numberOfPeriods < 1) Melder_throw ("There are no periods in the point process."); for (long iband = 1; iband <= ltas -> nx; iband ++) { if (numbers -> z [1] [iband] == 0.0) { ltas -> z [1] [iband] = NUMundefined; } else { /* * Each bin now contains a total energy in Pa2 sec. * To convert this to power density, we */ double totalEnergyInThisBand = ltas -> z [1] [iband]; if (0 /* i.e. if you just want to have a spectrum of the voiced parts... */) { double energyDensityInThisBand = totalEnergyInThisBand / ltas -> dx; double powerDensityInThisBand = energyDensityInThisBand / (sound -> xmax - sound -> xmin); ltas -> z [1] [iband] = 10.0 * log10 (powerDensityInThisBand / 4.0e-10); } else { /* * And this is what we really want. The total energy has to be redistributed. */ double meanEnergyInThisBand = totalEnergyInThisBand / numbers -> z [1] [iband]; double meanNumberOfEnergiesPerBand = (double) totalNumberOfEnergies / ltas -> nx; double redistributedEnergyInThisBand = meanEnergyInThisBand * meanNumberOfEnergiesPerBand; double redistributedEnergyDensityInThisBand = redistributedEnergyInThisBand / ltas -> dx; double redistributedPowerDensityInThisBand = redistributedEnergyDensityInThisBand / (sound -> xmax - sound -> xmin); ltas -> z [1] [iband] = 10.0 * log10 (redistributedPowerDensityInThisBand / 4.0e-10); /* OLD: ltas -> z [1] [iband] = 10.0 * log10 (ltas -> z [1] [iband] / numbers -> z [1] [iband] * sound -> nx);*/ } } } for (long iband = 1; iband <= ltas -> nx; iband ++) { if (ltas -> z [1] [iband] == NUMundefined) { long ibandleft = iband - 1, ibandright = iband + 1; while (ibandleft >= 1 && ltas -> z [1] [ibandleft] == NUMundefined) ibandleft --; while (ibandright <= ltas -> nx && ltas -> z [1] [ibandright] == NUMundefined) ibandright ++; if (ibandleft < 1 && ibandright > ltas -> nx) Melder_throw ("Cannot create an Ltas without energy in any bins."); if (ibandleft < 1) { ltas -> z [1] [iband] = ltas -> z [1] [ibandright]; } else if (ibandright > ltas -> nx) { ltas -> z [1] [iband] = ltas -> z [1] [ibandleft]; } else { double frequency = ltas -> x1 + (iband - 1) * ltas -> dx; double fleft = ltas -> x1 + (ibandleft - 1) * ltas -> dx; double fright = ltas -> x1 + (ibandright - 1) * ltas -> dx; ltas -> z [1] [iband] = ((fright - frequency) * ltas -> z [1] [ibandleft] + (frequency - fleft) * ltas -> z [1] [ibandright]) / (fright - fleft); } } } return ltas.transfer(); } catch (MelderError) { Melder_throw (sound, " & ", pulses, ": LTAS analysis not performed."); } }
PowerCepstrogram Sound_to_PowerCepstrogram_hillenbrand (Sound me, double minimumPitch, double dt) { try { // minimum analysis window has 3 periods of lowest pitch double analysisWidth = 3 / minimumPitch; if (analysisWidth > my dx * my nx) { analysisWidth = my dx * my nx; } double t1, samplingFrequency = 1 / my dx; autoSound thee; if (samplingFrequency > 30000) { samplingFrequency = samplingFrequency / 2; thee.reset (Sound_resample (me, samplingFrequency, 1)); } else { thee.reset (Data_copy (me)); } // pre-emphasis with fixed coefficient 0.9 for (long i = thy nx; i > 1; i--) { thy z[1][i] -= 0.9 * thy z[1][i - 1]; } long nosInWindow = analysisWidth * samplingFrequency, nFrames; if (nosInWindow < 8) { Melder_throw ("Analysis window too short."); } Sampled_shortTermAnalysis (thee.peek(), analysisWidth, dt, & nFrames, & t1); autoNUMvector<double> hamming (1, nosInWindow); for (long i = 1; i <= nosInWindow; i++) { hamming[i] = 0.54 -0.46 * cos(2 * NUMpi * (i - 1) / (nosInWindow - 1)); } long nfft = 8; // minimum possible while (nfft < nosInWindow) { nfft *= 2; } long nfftdiv2 = nfft / 2; autoNUMvector<double> fftbuf (1, nfft); // "complex" array autoNUMvector<double> spectrum (1, nfftdiv2 + 1); // +1 needed autoNUMfft_Table fftTable; NUMfft_Table_init (&fftTable, nfft); // sound to spectrum double qmax = 0.5 * nfft / samplingFrequency, dq = qmax / (nfftdiv2 + 1); autoPowerCepstrogram him = PowerCepstrogram_create (my xmin, my xmax, nFrames, dt, t1, 0, qmax, nfftdiv2+1, dq, 0); autoMelderProgress progress (L"Cepstrogram analysis"); for (long iframe = 1; iframe <= nFrames; iframe++) { double tbegin = t1 + (iframe - 1) * dt - analysisWidth / 2; tbegin = tbegin < thy xmin ? thy xmin : tbegin; long istart = Sampled_xToIndex (thee.peek(), tbegin); istart = istart < 1 ? 1 : istart; long iend = istart + nosInWindow - 1; iend = iend > thy nx ? thy nx : iend; for (long i = 1; i <= nosInWindow; i++) { fftbuf[i] = thy z[1][istart + i - 1] * hamming[i]; } for (long i = nosInWindow + 1; i <= nfft; i++) { fftbuf[i] = 0; } NUMfft_forward (&fftTable, fftbuf.peek()); complexfftoutput_to_power (fftbuf.peek(), nfft, spectrum.peek(), true); // log10(|fft|^2) // subtract average double specmean = spectrum[1]; for (long i = 2; i <= nfftdiv2 + 1; i++) { specmean += spectrum[i]; } specmean /= nfftdiv2 + 1; for (long i = 1; i <= nfftdiv2 + 1; i++) { spectrum[i] -= specmean; } /* * Here we diverge from Hillenbrand as he takes the fft of half of the spectral values. * H. forgets that the actual spectrum has nfft/2+1 values. Thefore, we take the inverse * transform because this keeps the number of samples a power of 2. * At the same time this results in twice as much numbers in the quefrency domain, i.e. we end with nfft/2+1 * numbers while H. has only nfft/4! */ fftbuf[1] = spectrum[1]; for (long i = 2; i < nfftdiv2 + 1; i++) { fftbuf[i+i-2] = spectrum[i]; fftbuf[i+i-1] = 0; } fftbuf[nfft] = spectrum[nfftdiv2 + 1]; NUMfft_backward (&fftTable, fftbuf.peek()); for (long i = 1; i <= nfftdiv2 + 1; i++) { his z[i][iframe] = fftbuf[i] * fftbuf[i]; } if ((iframe % 10) == 1) { Melder_progress ((double) iframe / nFrames, L"Cepstrogram analysis of frame ", Melder_integer (iframe), L" out of ", Melder_integer (nFrames), L"."); } } return him.transfer(); } catch (MelderError) { Melder_throw (me, ": no Cepstrogram created."); } }
autoSpectrogram Sound_and_Pitch_to_Spectrogram (Sound me, Pitch thee, double analysisWidth, double dt, double f1_hz, double fmax_hz, double df_hz, double relative_bw) { try { double t1, windowDuration = 2.0 * analysisWidth; /* gaussian window */ double nyquist = 0.5 / my dx, samplingFrequency = 2.0 * nyquist, fmin_hz = 0.0; long numberOfFrames, f0_undefined = 0.0; if (my xmin > thy xmin || my xmax > thy xmax) Melder_throw (U"The domain of the Sound is not included in the domain of the Pitch."); double f0_median = Pitch_getQuantile (thee, thy xmin, thy xmax, 0.5, kPitch_unit_HERTZ); if (f0_median == NUMundefined || f0_median == 0.0) { f0_median = 100.0; Melder_warning (U"Pitch values undefined. Bandwith fixed to 100 Hz. "); } if (f1_hz <= 0.0) { f1_hz = 100.0; } if (fmax_hz <= 0.0) { fmax_hz = nyquist; } if (df_hz <= 0.0) { df_hz = f0_median / 2.0; } if (relative_bw <= 0.0) { relative_bw = 1.1; } fmax_hz = MIN (fmax_hz, nyquist); long numberOfFilters = lround ( (fmax_hz - f1_hz) / df_hz); Sampled_shortTermAnalysis (me, windowDuration, dt, &numberOfFrames, &t1); autoSpectrogram him = Spectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_hz, fmax_hz, numberOfFilters, df_hz, f1_hz); // Temporary objects autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoMelderProgress progress (U"Sound & Pitch: To FormantFilter"); for (long iframe = 1; iframe <= numberOfFrames; iframe++) { double t = Sampled_indexToX (him.get(), iframe); double b, f0 = Pitch_getValueAtTime (thee, t, kPitch_unit_HERTZ, 0); if (f0 == NUMundefined || f0 == 0.0) { f0_undefined ++; f0 = f0_median; } b = relative_bw * f0; Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0); Sounds_multiply (sframe.get(), window.get()); Sound_into_Spectrogram_frame (sframe.get(), him.get(), iframe, b); if (iframe % 10 == 1) { Melder_progress ( (double) iframe / numberOfFrames, U"Frame ", iframe, U" out of ", numberOfFrames, U"."); } } _Spectrogram_windowCorrection (him.get(), window -> nx); return him; } catch (MelderError) { Melder_throw (U"FormantFilter not created from Pitch & FormantFilter."); } }
BarkFilter Sound_to_BarkFilter (Sound me, double analysisWidth, double dt, double f1_bark, double fmax_bark, double df_bark) { try { double t1, nyquist = 0.5 / my dx, samplingFrequency = 2 * nyquist; double windowDuration = 2 * analysisWidth; /* gaussian window */ double zmax = NUMhertzToBark2 (nyquist); double fmin_bark = 0; long nt, frameErrorCount = 0; // Check defaults. if (f1_bark <= 0) { f1_bark = 1; } if (fmax_bark <= 0) { fmax_bark = zmax; } if (df_bark <= 0) { df_bark = 1; } fmax_bark = MIN (fmax_bark, zmax); long nf = floor ( (fmax_bark - f1_bark) / df_bark + 0.5); if (nf <= 0) { Melder_throw ("The combination of filter parameters is not valid."); } Sampled_shortTermAnalysis (me, windowDuration, dt, & nt, & t1); autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency); autoSound window = Sound_createGaussian (windowDuration, samplingFrequency); autoBarkFilter thee = BarkFilter_create (my xmin, my xmax, nt, dt, t1, fmin_bark, fmax_bark, nf, df_bark, f1_bark); autoMelderProgress progess (L"BarkFilter analysis"); for (long i = 1; i <= nt; i++) { double t = Sampled_indexToX (thee.peek(), i); Sound_into_Sound (me, sframe.peek(), t - windowDuration / 2); Sounds_multiply (sframe.peek(), window.peek()); if (! Sound_into_BarkFilter_frame (sframe.peek(), thee.peek(), i)) { frameErrorCount++; } if ( (i % 10) == 1) { Melder_progress ( (double) i / nt, L"BarkFilter analysis: frame ", Melder_integer (i), L" from ", Melder_integer (nt), L"."); therror } } if (frameErrorCount > 0) { Melder_warning (L"Analysis results of ", Melder_integer (frameErrorCount), L" frame(s) out of ", Melder_integer (nt), L" will be suspect."); } double ref = FilterBank_DBREF * gaussian_window_squared_correction (window -> nx); NUMdmatrix_to_dBs (thy z, 1, thy ny, 1, thy nx, ref, FilterBank_DBFAC, FilterBank_DBFLOOR); return thee.transfer(); } catch (MelderError) {
static autoFormant Sound_to_Formant_any_inline (Sound me, double dt_in, int numberOfPoles, double halfdt_window, int which, double preemphasisFrequency, double safetyMargin) { double dt = dt_in > 0.0 ? dt_in : halfdt_window / 4.0; double duration = my nx * my dx, t1; double dt_window = 2.0 * halfdt_window; long nFrames = 1 + (long) floor ((duration - dt_window) / dt); long nsamp_window = (long) floor (dt_window / my dx), halfnsamp_window = nsamp_window / 2; if (nsamp_window < numberOfPoles + 1) Melder_throw (U"Window too short."); t1 = my x1 + 0.5 * (duration - my dx - (nFrames - 1) * dt); // centre of first frame if (nFrames < 1) { nFrames = 1; t1 = my x1 + 0.5 * duration; dt_window = duration; nsamp_window = my nx; } autoFormant thee = Formant_create (my xmin, my xmax, nFrames, dt, t1, (numberOfPoles + 1) / 2); // e.g. 11 poles -> maximally 6 formants autoNUMvector <double> window (1, nsamp_window); autoNUMvector <double> frame (1, nsamp_window); autoNUMvector <double> cof (1, numberOfPoles); // superfluous if which==2, but nobody uses that anyway autoMelderProgress progress (U"Formant analysis..."); /* Pre-emphasis. */ Sound_preEmphasis (me, preemphasisFrequency); /* Gaussian window. */ for (long i = 1; i <= nsamp_window; i ++) { double imid = 0.5 * (nsamp_window + 1), edge = exp (-12.0); window [i] = (exp (-48.0 * (i - imid) * (i - imid) / (nsamp_window + 1) / (nsamp_window + 1)) - edge) / (1.0 - edge); } for (long iframe = 1; iframe <= nFrames; iframe ++) { double t = Sampled_indexToX (thee.peek(), iframe); long leftSample = Sampled_xToLowIndex (me, t); long rightSample = leftSample + 1; long startSample = rightSample - halfnsamp_window; long endSample = leftSample + halfnsamp_window; double maximumIntensity = 0.0; if (startSample < 1) startSample = 1; if (endSample > my nx) endSample = my nx; for (long i = startSample; i <= endSample; i ++) { double value = Sampled_getValueAtSample (me, i, Sound_LEVEL_MONO, 0); if (value * value > maximumIntensity) { maximumIntensity = value * value; } } if (maximumIntensity == HUGE_VAL) Melder_throw (U"Sound contains infinities."); thy d_frames [iframe]. intensity = maximumIntensity; if (maximumIntensity == 0.0) continue; // Burg cannot stand all zeroes /* Copy a pre-emphasized window to a frame. */ for (long j = 1, i = startSample; j <= nsamp_window; j ++) frame [j] = Sampled_getValueAtSample (me, i ++, Sound_LEVEL_MONO, 0) * window [j]; if (which == 1) { burg (frame.peek(), endSample - startSample + 1, cof.peek(), numberOfPoles, & thy d_frames [iframe], 0.5 / my dx, safetyMargin); } else if (which == 2) { if (! splitLevinson (frame.peek(), endSample - startSample + 1, numberOfPoles, & thy d_frames [iframe], 0.5 / my dx)) { Melder_clearError (); Melder_casual (U"(Sound_to_Formant:)" U" Analysis results of frame ", iframe, U" will be wrong." ); } } Melder_progress ((double) iframe / (double) nFrames, U"Formant analysis: frame ", iframe); } Formant_sort (thee.peek()); return thee; }
/* This routine is modeled after qdiag.m from Andreas Ziehe, Pavel Laskov, Guido Nolte, Klaus-Robert Müller, A Fast Algorithm for Joint Diagonalization with Non-orthogonal Transformations and its Application to Blind Source Separation, Journal of Machine Learning Research 5 (2004), 777–800. */ static void Diagonalizer_and_CrossCorrelationTables_ffdiag (Diagonalizer me, CrossCorrelationTables thee, long maxNumberOfIterations, double delta) { try { long iter = 0, dimension = my numberOfRows; double **v = my data; autoCrossCorrelationTables ccts = CrossCorrelationTables_and_Diagonalizer_diagonalize (thee, me); autoNUMmatrix<double> w (1, dimension, 1, dimension); autoNUMmatrix<double> vnew (1, dimension, 1, dimension); autoNUMmatrix<double> cc (1, dimension, 1, dimension); for (long i = 1; i <= dimension; i++) { w[i][i] = 1; } autoMelderProgress progress (U"Simultaneous diagonalization of many CrossCorrelationTables..."); double dm_new = CrossCorrelationTables_getDiagonalityMeasure (ccts.peek(), nullptr, 0, 0); try { double dm_old, theta = 1, dm_start = dm_new; do { dm_old = dm_new; for (long i = 1; i <= dimension; i++) { for (long j = i + 1; j <= dimension; j++) { double zii = 0, zij = 0, zjj = 0, yij = 0, yji = 0; // zij = zji for (long k = 1; k <= ccts -> size; k++) { CrossCorrelationTable ct = (CrossCorrelationTable) ccts -> item [k]; zii += ct -> data[i][i] * ct -> data[i][i]; zij += ct -> data[i][i] * ct -> data[j][j]; zjj += ct -> data[j][j] * ct -> data[j][j]; yij += ct -> data[j][j] * ct -> data[i][j]; yji += ct -> data[i][i] * ct -> data[i][j]; } double denom = zjj * zii - zij * zij; if (denom != 0) { w[i][j] = (zij * yji - zii * yij) / denom; w[j][i] = (zij * yij - zjj * yji) / denom; } } } double norma = 0; for (long i = 1; i <= dimension; i++) { double normai = 0; for (long j = 1; j <= dimension; j++) { if (i != j) { normai += fabs (w[i][j]); } } if (normai > norma) { norma = normai; } } // evaluate the norm if (norma > theta) { double normf = 0; for (long i = 1; i <= dimension; i++) for (long j = 1; j <= dimension; j++) if (i != j) { normf += w[i][j] * w[i][j]; } double scalef = theta / sqrt (normf); for (long i = 1; i <= dimension; i++) { for (long j = 1; j <= dimension; j++) { if (i != j) { w[i][j] *= scalef; } } } } // update V NUMmatrix_copyElements (v, vnew.peek(), 1, dimension, 1, dimension); NUMdmatrices_multiply_VC (v, w.peek(), dimension, dimension, vnew.peek(), dimension); for (long k = 1; k <= ccts -> size; k++) { CrossCorrelationTable ct = (CrossCorrelationTable) ccts -> item[k]; NUMmatrix_copyElements (ct -> data, cc.peek(), 1, dimension, 1, dimension); NUMdmatrices_multiply_VCVp (ct -> data, w.peek(), dimension, dimension, cc.peek(), 1); } dm_new = CrossCorrelationTables_getDiagonalityMeasure (ccts.peek(), 0, 0, 0); iter++; Melder_progress ((double) iter / (double) maxNumberOfIterations, U"Iteration: ", iter, U", measure: ", dm_new, U"\n fractional measure: ", dm_new / dm_start); } while (fabs ((dm_old - dm_new) / dm_new) > delta && iter < maxNumberOfIterations); } catch (MelderError) { Melder_clearError (); } } catch (MelderError) { Melder_throw (me, U" & ", thee, U": no joint diagonalization (ffdiag)."); } }
Sound Sound_deepenBandModulation (Sound me, double enhancement_dB, double flow, double fhigh, double slowModulation, double fastModulation, double bandSmoothing) { try { autoSound thee = Data_copy (me); double maximumFactor = pow (10, enhancement_dB / 20), alpha = sqrt (log (2.0)); double alphaslow = alpha / slowModulation, alphafast = alpha / fastModulation; for (long channel = 1; channel <= my ny; channel ++) { autoSound channelSound = Sound_extractChannel (me, channel); autoSpectrum orgspec = Sound_to_Spectrum (channelSound.peek(), true); /* * Keep the part of the sound that is outside the filter bank. */ autoSpectrum spec = Data_copy (orgspec.peek()); Spectrum_stopHannBand (spec.peek(), flow, fhigh, bandSmoothing); autoSound filtered = Spectrum_to_Sound (spec.peek()); long n = thy nx; double *amp = thy z [channel]; for (long i = 1; i <= n; i ++) amp [i] = filtered -> z [1] [i]; autoMelderProgress progress (U"Deepen band modulation..."); double fmin = flow; while (fmin < fhigh) { /* * Take a one-bark frequency band. */ double fmid_bark = NUMhertzToBark (fmin) + 0.5, ceiling; double fmax = NUMbarkToHertz (NUMhertzToBark (fmin) + 1); if (fmax > fhigh) fmax = fhigh; Melder_progress (fmin / fhigh, U"Band: ", Melder_fixed (fmin, 0), U" ... ", Melder_fixed (fmax, 0), U" Hz"); NUMmatrix_copyElements (orgspec -> z, spec -> z, 1, 2, 1, spec -> nx); Spectrum_passHannBand (spec.peek(), fmin, fmax, bandSmoothing); autoSound band = Spectrum_to_Sound (spec.peek()); /* * Compute a relative intensity contour. */ autoSound intensity = Data_copy (band.peek()); n = intensity -> nx; amp = intensity -> z [1]; for (long i = 1; i <= n; i ++) amp [i] = 10 * log10 (amp [i] * amp [i] + 1e-6); autoSpectrum intensityFilter = Sound_to_Spectrum (intensity.peek(), true); n = intensityFilter -> nx; for (long i = 1; i <= n; i ++) { double frequency = intensityFilter -> x1 + (i - 1) * intensityFilter -> dx; double slow = alphaslow * frequency, fast = alphafast * frequency; double factor = exp (- fast * fast) - exp (- slow * slow); intensityFilter -> z [1] [i] *= factor; intensityFilter -> z [2] [i] *= factor; } intensity.reset (Spectrum_to_Sound (intensityFilter.peek())); n = intensity -> nx; amp = intensity -> z [1]; for (long i = 1; i <= n; i ++) amp [i] = pow (10, amp [i] / 2); /* * Clip to maximum enhancement. */ ceiling = 1 + (maximumFactor - 1.0) * (0.5 - 0.5 * cos (NUMpi * fmid_bark / 13)); for (long i = 1; i <= n; i ++) amp [i] = 1 / (1 / amp [i] + 1 / ceiling); n = thy nx; amp = thy z [channel]; for (long i = 1; i <= n; i ++) amp [i] += band -> z [1] [i] * intensity -> z [1] [i]; fmin = fmax; } } Vector_scale (thee.peek(), 0.99); /* Truncate. */ thy xmin = my xmin; thy xmax = my xmax; thy nx = my nx; thy x1 = my x1; return thee.transfer(); } catch (MelderError) { Melder_throw (me, U": band modulation not deepened."); } }
autoSpectrogram Sound_to_Spectrogram (Sound me, double effectiveAnalysisWidth, double fmax, double minimumTimeStep1, double minimumFreqStep1, enum kSound_to_Spectrogram_windowShape windowType, double maximumTimeOversampling, double maximumFreqOversampling) { try { double nyquist = 0.5 / my dx; double physicalAnalysisWidth = windowType == kSound_to_Spectrogram_windowShape_GAUSSIAN ? 2 * effectiveAnalysisWidth : effectiveAnalysisWidth; double effectiveTimeWidth = effectiveAnalysisWidth / sqrt (NUMpi); double effectiveFreqWidth = 1 / effectiveTimeWidth; double minimumTimeStep2 = effectiveTimeWidth / maximumTimeOversampling; double minimumFreqStep2 = effectiveFreqWidth / maximumFreqOversampling; double timeStep = minimumTimeStep1 > minimumTimeStep2 ? minimumTimeStep1 : minimumTimeStep2; double freqStep = minimumFreqStep1 > minimumFreqStep2 ? minimumFreqStep1 : minimumFreqStep2; double duration = my dx * (double) my nx, windowssq = 0.0; /* * Compute the time sampling. */ long nsamp_window = (long) floor (physicalAnalysisWidth / my dx); long halfnsamp_window = nsamp_window / 2 - 1; nsamp_window = halfnsamp_window * 2; if (nsamp_window < 1) Melder_throw (U"Your analysis window is too short: less than two samples."); if (physicalAnalysisWidth > duration) Melder_throw (U"Your sound is too short:\n" U"it should be at least as long as ", windowType == kSound_to_Spectrogram_windowShape_GAUSSIAN ? U"two window lengths." : U"one window length."); long numberOfTimes = 1 + (long) floor ((duration - physicalAnalysisWidth) / timeStep); // >= 1 double t1 = my x1 + 0.5 * ((double) (my nx - 1) * my dx - (double) (numberOfTimes - 1) * timeStep); /* Centre of first frame. */ /* * Compute the frequency sampling of the FFT spectrum. */ if (fmax <= 0.0 || fmax > nyquist) fmax = nyquist; long numberOfFreqs = (long) floor (fmax / freqStep); if (numberOfFreqs < 1) return autoSpectrogram (); long nsampFFT = 1; while (nsampFFT < nsamp_window || nsampFFT < 2 * numberOfFreqs * (nyquist / fmax)) nsampFFT *= 2; long half_nsampFFT = nsampFFT / 2; /* * Compute the frequency sampling of the spectrogram. */ long binWidth_samples = (long) floor (freqStep * my dx * nsampFFT); if (binWidth_samples < 1) binWidth_samples = 1; double binWidth_hertz = 1.0 / (my dx * nsampFFT); freqStep = binWidth_samples * binWidth_hertz; numberOfFreqs = (long) floor (fmax / freqStep); if (numberOfFreqs < 1) return autoSpectrogram (); autoSpectrogram thee = Spectrogram_create (my xmin, my xmax, numberOfTimes, timeStep, t1, 0.0, fmax, numberOfFreqs, freqStep, 0.5 * (freqStep - binWidth_hertz)); autoNUMvector <double> frame (1, nsampFFT); autoNUMvector <double> spec (1, nsampFFT); autoNUMvector <double> window (1, nsamp_window); autoNUMfft_Table fftTable; NUMfft_Table_init (& fftTable, nsampFFT); autoMelderProgress progress (U"Sound to Spectrogram..."); for (long i = 1; i <= nsamp_window; i ++) { double nSamplesPerWindow_f = physicalAnalysisWidth / my dx; double phase = (double) i / nSamplesPerWindow_f; // 0 .. 1 double value; switch (windowType) { case kSound_to_Spectrogram_windowShape_SQUARE: value = 1.0; break; case kSound_to_Spectrogram_windowShape_HAMMING: value = 0.54 - 0.46 * cos (2.0 * NUMpi * phase); break; case kSound_to_Spectrogram_windowShape_BARTLETT: value = 1.0 - fabs ((2.0 * phase - 1.0)); break; case kSound_to_Spectrogram_windowShape_WELCH: value = 1.0 - (2.0 * phase - 1.0) * (2.0 * phase - 1.0); break; case kSound_to_Spectrogram_windowShape_HANNING: value = 0.5 * (1.0 - cos (2.0 * NUMpi * phase)); break; case kSound_to_Spectrogram_windowShape_GAUSSIAN: { double imid = 0.5 * (double) (nsamp_window + 1), edge = exp (-12.0); phase = ((double) i - imid) / nSamplesPerWindow_f; /* -0.5 .. +0.5 */ value = (exp (-48.0 * phase * phase) - edge) / (1.0 - edge); break; } break; default: value = 1.0; } window [i] = (float) value; windowssq += value * value; } double oneByBinWidth = 1.0 / windowssq / binWidth_samples; for (long iframe = 1; iframe <= numberOfTimes; iframe ++) { double t = Sampled_indexToX (thee.peek(), iframe); long leftSample = Sampled_xToLowIndex (me, t), rightSample = leftSample + 1; long startSample = rightSample - halfnsamp_window; long endSample = leftSample + halfnsamp_window; Melder_assert (startSample >= 1); Melder_assert (endSample <= my nx); for (long i = 1; i <= half_nsampFFT; i ++) { spec [i] = 0.0; } for (long channel = 1; channel <= my ny; channel ++) { for (long j = 1, i = startSample; j <= nsamp_window; j ++) { frame [j] = my z [channel] [i ++] * window [j]; } for (long j = nsamp_window + 1; j <= nsampFFT; j ++) frame [j] = 0.0f; Melder_progress (iframe / (numberOfTimes + 1.0), U"Sound to Spectrogram: analysis of frame ", iframe, U" out of ", numberOfTimes); /* Compute Fast Fourier Transform of the frame. */ NUMfft_forward (& fftTable, frame.peek()); // complex spectrum /* Put power spectrum in frame [1..half_nsampFFT + 1]. */ spec [1] += frame [1] * frame [1]; // DC component for (long i = 2; i <= half_nsampFFT; i ++) spec [i] += frame [i + i - 2] * frame [i + i - 2] + frame [i + i - 1] * frame [i + i - 1]; spec [half_nsampFFT + 1] += frame [nsampFFT] * frame [nsampFFT]; // Nyquist frequency. Correct?? } if (my ny > 1 ) for (long i = 1; i <= half_nsampFFT; i ++) { spec [i] /= my ny; } /* Bin into frame [1..nBands]. */ for (long iband = 1; iband <= numberOfFreqs; iband ++) { long leftsample = (iband - 1) * binWidth_samples + 1, rightsample = leftsample + binWidth_samples; float power = 0.0f; for (long i = leftsample; i < rightsample; i ++) power += spec [i]; thy z [iband] [iframe] = power * oneByBinWidth; } } return thee; } catch (MelderError) { Melder_throw (me, U": spectrogram analysis not performed."); } }
autoMatrix Spectrum_unwrap (Spectrum me) { try { struct tribolet_struct tbs; int remove_linear_part = 1; long nfft = 2; while (nfft < my nx - 1) { nfft *= 2; } nfft *= 2; if (nfft / 2 != my nx - 1) { Melder_throw (U"Dimension of Spectrum is not (power of 2 - 1)."); } autoSound x = Spectrum_to_Sound (me); autoSound nx = Data_copy (x.get()); for (long i = 1; i <= x -> nx; i++) { nx -> z[1][i] *= (i - 1); } autoSpectrum snx = Sound_to_Spectrum (nx.get(), 1); autoMatrix thee = Matrix_create (my xmin, my xmax, my nx, my dx, my x1, 1, 2, 2, 1, 1); // Common variables. tbs.thlinc = THLINC; tbs.thlcon = THLCON; tbs.x = x -> z[1]; tbs.nx = x -> nx; tbs.l = (long) floor (pow (2, EXP2) + 0.1); tbs.ddf = NUM2pi / ( (tbs.l) * nfft); tbs.reverse_sign = my z[1][1] < 0; tbs.count = 0; // Reuse snx : put phase derivative (d/df) in imaginary part. tbs.dvtmn2 = 0; for (long i = 1; i <= my nx; i ++) { double xr = my z[1][i], xi = my z[2][i]; double nxr = snx -> z[1][i], nxi = snx -> z[2][i]; double xmsq = xr * xr + xi * xi; double pdvt = PHADVT (xr, xi, nxr, nxi, xmsq); thy z[1][i] = xmsq; snx -> z[2][i] = pdvt; tbs.dvtmn2 += pdvt; } tbs.dvtmn2 = (2 * tbs.dvtmn2 - snx -> z[2][1] - snx -> z[2][my nx]) / (my nx - 1); autoMelderProgress progress (U"Phase unwrapping"); double pphase = 0, phase = 0; double ppdvt = snx -> z[2][1]; thy z[2][1] = PPVPHA (my z[1][1], my z[2][1], tbs.reverse_sign); for (long i = 2; i <= my nx; i ++) { double pfreq = NUM2pi * (i - 1) / nfft; double pdvt = snx -> z[2][i]; double ppv = PPVPHA (my z[1][i], my z[2][i], tbs.reverse_sign); phase = phase_unwrap (&tbs, pfreq, ppv, pdvt, &pphase, &ppdvt); ppdvt = pdvt; thy z[2][i] = pphase = phase; Melder_progress ( (double) i / my nx, i, U" unwrapped phases from ", my nx, U"."); } long iphase = (long) floor (phase / NUMpi + 0.1); // ppgb: better than truncation toward zero if (remove_linear_part) { phase /= my nx - 1; for (long i = 2; i <= my nx; i ++) { thy z[2][i] -= phase * (i - 1); } } Melder_information (U"Number of spectral values: ", tbs.count); Melder_information (U" iphase = ", iphase); return thee; } catch (MelderError) { Melder_throw (me, U": not unwrapped."); } }