Пример #1
0
void Sound_playPart (Sound me, double tmin, double tmax,
	int (*callback) (void *closure, int phase, double tmin, double tmax, double t), void *closure)
{
	try {
		long ifsamp = lround (1.0 / my dx), bestSampleRate = MelderAudio_getOutputBestSampleRate (ifsamp);
		if (ifsamp == bestSampleRate) {
			struct SoundPlay *thee = (struct SoundPlay *) & thePlayingSound;
			double *fromLeft = my z [1], *fromRight = my ny > 1 ? my z [2] : NULL;
			MelderAudio_stopPlaying (MelderAudio_IMPLICIT);
			long i1, i2;
			if ((thy numberOfSamples = Matrix_getWindowSamplesX (me, tmin, tmax, & i1, & i2)) < 1) return;
			thy tmin = tmin;
			thy tmax = tmax;
			thy dt = my dx;
			thy t1 = my x1;
			thy callback = callback;
			thy closure = closure;
			thy silenceBefore = (long) (ifsamp * MelderAudio_getOutputSilenceBefore ());
			thy silenceAfter = (long) (ifsamp * MelderAudio_getOutputSilenceAfter ());
			int numberOfChannels = my ny;
			NUMvector_free (thy buffer, 1);   // just in case
			thy buffer = NUMvector <short> (1, (i2 - i1 + 1 + thy silenceBefore + thy silenceAfter) * numberOfChannels);
			thy i1 = i1;
			thy i2 = i2;
			short *to = thy buffer + thy silenceBefore * numberOfChannels;
			if (numberOfChannels > 2) {
				for (long i = i1; i <= i2; i ++) {
					for (long chan = 1; chan <= my ny; chan ++) {
						long value = (long) round (my z [chan] [i] * 32768.0);
						* ++ to = value < -32768 ? -32768 : value > 32767 ? 32767 : value;
					}
				}
			} else if (numberOfChannels == 2) {
				for (long i = i1; i <= i2; i ++) {
					long valueLeft = (long) round (fromLeft [i] * 32768.0);
					* ++ to = valueLeft < -32768 ? -32768 : valueLeft > 32767 ? 32767 : valueLeft;
					long valueRight = (long) round (fromRight [i] * 32768.0);
					* ++ to = valueRight < -32768 ? -32768 : valueRight > 32767 ? 32767 : valueRight;
				}
			} else {
				for (long i = i1; i <= i2; i ++) {
					long value = (long) round (fromLeft [i] * 32768.0);
					* ++ to = value < -32768 ? -32768 : value > 32767 ? 32767 : value;
				}
			}
			if (thy callback) thy callback (thy closure, 1, tmin, tmax, tmin);
			MelderAudio_play16 (thy buffer + 1, ifsamp,
				thy silenceBefore + thy numberOfSamples + thy silenceAfter, numberOfChannels, melderPlayCallback, thee);
		} else {
			autoSound resampled = Sound_resample (me, bestSampleRate, 1);
			Sound_playPart (resampled.peek(), tmin, tmax, callback, closure);   // recursively
		}
	} catch (MelderError) {
		Melder_throw (me, U": not played.");
	}
}
Пример #2
0
autoFormant Sound_to_Formant_any (Sound me, double dt, int numberOfPoles, double maximumFrequency,
	double halfdt_window, int which, double preemphasisFrequency, double safetyMargin)
{
	double nyquist = 0.5 / my dx;
	autoSound sound;
	if (maximumFrequency <= 0.0 || fabs (maximumFrequency / nyquist - 1) < 1.0e-12) {
		sound = Data_copy (me);   // will be modified
	} else {
		sound = Sound_resample (me, maximumFrequency * 2, 50);
	}
	return Sound_to_Formant_any_inline (sound.peek(), dt, numberOfPoles, halfdt_window, which, preemphasisFrequency, safetyMargin);
}
Пример #3
0
autoSpectrum Spectrum_resample (Spectrum me, long numberOfFrequencies) {
	try {
		double newSamplingFrequency = (1 / my dx) * numberOfFrequencies / my nx;
		// resample real and imaginary part !
		autoSound thee = Sound_resample ((Sound) me, newSamplingFrequency, 50);
		autoSpectrum him = Spectrum_create (my xmax, numberOfFrequencies);
		NUMmatrix_copyElements<double> (thy z, his z, 1, 2, 1, numberOfFrequencies);
		return him;
	} catch (MelderError) {
		Melder_throw (me, U": not resampled.");
	}
}
PowerCepstrogram Sound_to_PowerCepstrogram (Sound me, double pitchFloor, double dt, double maximumFrequency, double preEmphasisFrequency) {
	try {
		// minimum analysis window has 3 periods of lowest pitch
		double analysisWidth = 3  / pitchFloor;
		double windowDuration = 2 * analysisWidth; /* gaussian window */
		long nFrames;

		// Convenience: analyse the whole sound into one Cepstrogram_frame
		if (windowDuration > my dx * my nx) {
			windowDuration = my dx * my nx;
		}
		double t1, samplingFrequency = 2 * maximumFrequency;
		autoSound sound = Sound_resample (me, samplingFrequency, 50);
		Sound_preEmphasis (sound.peek(), preEmphasisFrequency);
		Sampled_shortTermAnalysis (me, windowDuration, dt, & nFrames, & t1);
		autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency);
		autoSound window = Sound_createGaussian (windowDuration, samplingFrequency);
		// find out the size of the FFT
		long nfft = 2;
		while (nfft < sframe -> nx) nfft *= 2;
		long nq = nfft / 2 + 1;
		double qmax = 0.5 * nfft / samplingFrequency, dq = qmax / (nq - 1);
		autoPowerCepstrogram thee = PowerCepstrogram_create (my xmin, my xmax, nFrames, dt, t1, 0, qmax, nq, dq, 0);

		autoMelderProgress progress (L"Cepstrogram analysis");

		for (long iframe = 1; iframe <= nFrames; iframe++) {
			double t = Sampled_indexToX (thee.peek(), iframe);
			Sound_into_Sound (sound.peek(), sframe.peek(), t - windowDuration / 2);
			Vector_subtractMean (sframe.peek());
			Sounds_multiply (sframe.peek(), window.peek());
			autoSpectrum spec = Sound_to_Spectrum (sframe.peek(), 1); // FFT yes
			autoPowerCepstrum cepstrum = Spectrum_to_PowerCepstrum (spec.peek());
			for (long i = 1; i <= nq; i++) {
				thy z[i][iframe] = cepstrum -> z[1][i];
			}
			if ((iframe % 10) == 1) {
				Melder_progress ((double) iframe / nFrames, L"PowerCepstrogram analysis of frame ",
					Melder_integer (iframe), L" out of ", Melder_integer (nFrames), L".");
			}
		}
		return thee.transfer();
	} catch (MelderError) {
		Melder_throw (me, ": no PowerCepstrogram created.");
	}
}
Пример #5
0
Cepstrogram Sound_to_Cepstrogram (Sound me, double analysisWidth, double dt, double maximumFrequency) {
	try {
		double windowDuration = 2 * analysisWidth; /* gaussian window */
		long nFrames;

		// Convenience: analyse the whole sound into one Cepstrogram_frame
		if (windowDuration > my dx * my nx) {
			windowDuration = my dx * my nx;
		}
		double t1, samplingFrequency = 2 * maximumFrequency;
		autoSound sound = Sound_resample (me, samplingFrequency, 50);
		Sampled_shortTermAnalysis (me, windowDuration, dt, & nFrames, & t1);
		autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency);
		autoSound window = Sound_createGaussian (windowDuration, samplingFrequency);
		double qmin, qmax, dq, q1;
		long nq;
		{ // laziness: find out the proper dimensions
			autoSpectrum spec = Sound_to_Spectrum (sframe.peek(), 1);
			autoCepstrum cepstrum = Spectrum_to_Cepstrum (spec.peek());
			qmin = cepstrum -> xmin; qmax = cepstrum -> xmax; dq = cepstrum -> dx;
			q1 = cepstrum -> x1; nq = cepstrum -> nx;
		}
		autoCepstrogram thee = Cepstrogram_create (my xmin, my xmax, nFrames, dt, t1, qmin, qmax, nq, dq, q1);

		autoMelderProgress progress (L"Cepstrogram analysis");

		for (long iframe = 1; iframe <= nFrames; iframe++) {
			double t = Sampled_indexToX (thee.peek(), iframe);
			Sound_into_Sound (sound.peek(), sframe.peek(), t - windowDuration / 2);
			Vector_subtractMean (sframe.peek());
			Sounds_multiply (sframe.peek(), window.peek());
			autoSpectrum spec = Sound_to_Spectrum (sframe.peek(), 1);
			autoCepstrum cepstrum = Spectrum_to_Cepstrum (spec.peek());
			for (long i = 1; i <= nq; i++) {
				thy z[i][iframe] = cepstrum -> z[1][i];
			}
			if ((iframe % 10) == 1) {
				Melder_progress ((double) iframe / nFrames, L"Cepstrogram analysis of frame ",
					Melder_integer (iframe), L" out of ", Melder_integer (nFrames), L".");
			}
		}
		return thee.transfer();
	} catch (MelderError) {
		Melder_throw (me, ": no Cepstrogram created.");
	}
}
Пример #6
0
static autoSound synthesize_pulses_lpc (Manipulation me) {
	try {
		if (! my lpc) {
			if (! my sound) Melder_throw (U"Missing original sound.");
			autoSound sound10k = Sound_resample (my sound.get(), 10000.0, 50);
			my lpc = Sound_to_LPC_burg (sound10k.get(), 20, 0.025, 0.01, 50.0);
		}
		if (! my pulses) Melder_throw (U"Missing pulses analysis.");
		autoSound train = PointProcess_to_Sound_pulseTrain (my pulses.get(), 1.0 / my lpc -> samplingPeriod, 0.7, 0.05, 30);
		train -> dx = my lpc -> samplingPeriod;   // to be exact
		Sound_PointProcess_fillVoiceless (train.get(), my pulses.get());
		autoSound result = LPC_and_Sound_filter (my lpc.get(), train.get(), true);
		NUMdeemphasize_f (result -> z [1], result -> nx, result -> dx, 50.0);
		Vector_scale (result.get(), 0.99);
		return result;
	} catch (MelderError) {
		Melder_throw (me, U": LPC synthesis not performed.");
	}
}
Formant Sound_to_Formant_robust (Sound me, double dt_in, int numberOfPoles, double maximumFrequency,
	double halfdt_window, double preEmphasisFrequency, double safetyMargin, double k, int itermax, double tol, int wantlocation)
{
	double dt = dt_in > 0.0 ? dt_in : halfdt_window / 4.0;
	double nyquist = 0.5 / my dx;
	long predictionOrder = 2 * numberOfPoles;
	try {
		autoSound sound = NULL;
		if (maximumFrequency <= 0.0 || fabs (maximumFrequency / nyquist - 1) < 1.0e-12) {
			sound.reset (Data_copy (me));   // will be modified
		} else {
			sound.reset (Sound_resample (me, maximumFrequency * 2, 50));
		}

		autoLPC lpc = Sound_to_LPC_auto (sound.peek(), predictionOrder, halfdt_window, dt, preEmphasisFrequency);
		autoLPC lpcr = LPC_and_Sound_to_LPC_robust (lpc.peek(), sound.peek(), halfdt_window, preEmphasisFrequency, k, itermax, tol, wantlocation);
		autoFormant thee = LPC_to_Formant (lpcr.peek(), safetyMargin);
		return thee.transfer();
	} catch (MelderError) {
		Melder_throw (me, ": no robust Formant created.");
	}
}
Пример #8
0
/*
	gain used as a constant amplitude multiplyer within a frame of duration my dx.
	future alternative: convolve gain with a  smoother.
*/
autoSound LPC_and_Sound_filter (LPC me, Sound thee, int useGain) {
	try {
		double xmin = my xmin > thy xmin ? my xmin : thy xmin;
		double xmax = my xmax < thy xmax ? my xmax : thy xmax;
		if (xmin >= xmax) {
			Melder_throw (U"Domains of Sound [", thy xmin, U",", thy xmax, U"] and LPC [",
				my xmin, U",", my xmax, U"] do not overlap.");
		}
		// resample sound if samplings don't match
		autoSound source;
		if (my samplingPeriod != thy dx) {
			source = Sound_resample (thee, 1.0 / my samplingPeriod, 50);
			thee = source.get();   // reference copy; remove at end
		}

		autoSound him = Data_copy (thee);

		double *x = his z[1];
		long ifirst = Sampled_xToHighIndex (thee, xmin);
		long ilast = Sampled_xToLowIndex (thee, xmax);
		for (long i = ifirst; i <= ilast; i++) {
			double t = his x1 + (i - 1) * his dx; /* Sampled_indexToX (him, i) */
			long iFrame = lround ( (t - my x1) / my dx + 1.0); /* Sampled_xToNearestIndex (me, t) */
			if (iFrame < 1) {
				continue;
			}
			if (iFrame > my nx) {
				break;
			}
			double *a = my d_frames[iFrame].a;
			long m = i > my d_frames[iFrame].nCoefficients ? my d_frames[iFrame].nCoefficients : i - 1;
			for (long j = 1; j <= m; j++) {
				x[i] -= a[j] * x[i - j];
			}
		}

		// Make samples before first frame and after last frame zero.

		for (long i = 1; i < ifirst; i++) {
			x[i] = 0.0;
		}

		for (long i = ilast + 1; i <= his nx; i++) {
			x[i] = 0.0;
		}
		if (useGain) {
			for (long i = ifirst; i <= ilast; i++) {
				double t = his x1 + (i - 1) * his dx; /* Sampled_indexToX (him, i) */
				double riFrame = (t - my x1) / my dx + 1; /* Sampled_xToIndex (me, t); */
				long iFrame = (long) floor (riFrame);
				double phase = riFrame - iFrame;
				if (iFrame < 0 || iFrame > my nx) {
					x[i] = 0.0;
				} else if (iFrame == 0) {
					x[i] *= sqrt (my d_frames[1].gain) * phase;
				} else if (iFrame == my nx) {
					x[i] *= sqrt (my d_frames[my nx].gain) * (1.0 - phase);
				} else x[i] *=
					    phase * sqrt (my d_frames[iFrame + 1].gain) + (1.0 - phase) * sqrt (my d_frames[iFrame].gain);
			}
		}
		return him;
	} catch (MelderError) {
		Melder_throw (thee, U": not filtered.");
	}
}
PowerCepstrogram Sound_to_PowerCepstrogram_hillenbrand (Sound me, double minimumPitch, double dt) {
	try {
		// minimum analysis window has 3 periods of lowest pitch
		double analysisWidth = 3  / minimumPitch;
		if (analysisWidth > my dx * my nx) {
			analysisWidth = my dx * my nx;
		}
		double t1, samplingFrequency = 1 / my dx;
		autoSound thee;
		if (samplingFrequency > 30000) {
			samplingFrequency = samplingFrequency / 2;
			thee.reset (Sound_resample (me, samplingFrequency, 1));
		} else {
			thee.reset (Data_copy (me));
		}
		// pre-emphasis with fixed coefficient 0.9
		for (long i = thy nx; i > 1; i--) {
			thy z[1][i] -= 0.9 * thy z[1][i - 1];
		}
		long nosInWindow = analysisWidth * samplingFrequency, nFrames;
		if (nosInWindow < 8) {
			Melder_throw ("Analysis window too short.");
		}
		Sampled_shortTermAnalysis (thee.peek(), analysisWidth, dt, & nFrames, & t1);
		autoNUMvector<double> hamming (1, nosInWindow);
		for (long i = 1; i <= nosInWindow; i++) {
			hamming[i] = 0.54 -0.46 * cos(2 * NUMpi * (i - 1) / (nosInWindow - 1));
		}
		long nfft = 8; // minimum possible
		while (nfft < nosInWindow) { nfft *= 2; }
		long nfftdiv2 = nfft / 2;
		autoNUMvector<double> fftbuf (1, nfft); // "complex" array
		autoNUMvector<double> spectrum (1, nfftdiv2 + 1); // +1 needed 
		autoNUMfft_Table fftTable;
		NUMfft_Table_init (&fftTable, nfft); // sound to spectrum
		
		double qmax = 0.5 * nfft / samplingFrequency, dq = qmax / (nfftdiv2 + 1);
		autoPowerCepstrogram him = PowerCepstrogram_create (my xmin, my xmax, nFrames, dt, t1, 0, qmax, nfftdiv2+1, dq, 0);
		
		autoMelderProgress progress (L"Cepstrogram analysis");
		
		for (long iframe = 1; iframe <= nFrames; iframe++) {
			double tbegin = t1 + (iframe - 1) * dt - analysisWidth / 2;
			tbegin = tbegin < thy xmin ? thy xmin : tbegin;
			long istart = Sampled_xToIndex (thee.peek(), tbegin);
			istart = istart < 1 ? 1 : istart;
			long iend = istart + nosInWindow - 1;
			iend = iend > thy nx ? thy nx : iend;
			for (long i = 1; i <= nosInWindow; i++) {
				fftbuf[i] = thy z[1][istart + i - 1] * hamming[i];
			}
			for (long i = nosInWindow + 1; i <= nfft; i++) { 
				fftbuf[i] = 0;
			}
			NUMfft_forward (&fftTable, fftbuf.peek());
			complexfftoutput_to_power (fftbuf.peek(), nfft, spectrum.peek(), true); // log10(|fft|^2)
			// subtract average
			double specmean = spectrum[1];
			for (long i = 2; i <= nfftdiv2 + 1; i++) {
				specmean += spectrum[i];
			}
			specmean /= nfftdiv2 + 1;
			for (long i = 1; i <= nfftdiv2 + 1; i++) {
				spectrum[i] -= specmean;
			}
			/*
			 * Here we diverge from Hillenbrand as he takes the fft of half of the spectral values.
			 * H. forgets that the actual spectrum has nfft/2+1 values. Thefore, we take the inverse
			 * transform because this keeps the number of samples a power of 2.
			 * At the same time this results in twice as much numbers in the quefrency domain, i.e. we end with nfft/2+1
			 * numbers while H. has only nfft/4!
			 */
			fftbuf[1] = spectrum[1];
			for (long i = 2; i < nfftdiv2 + 1; i++) {
				fftbuf[i+i-2] = spectrum[i];
				fftbuf[i+i-1] = 0;
			}
			fftbuf[nfft] = spectrum[nfftdiv2 + 1];
			NUMfft_backward (&fftTable, fftbuf.peek());
			for (long i = 1; i <= nfftdiv2 + 1; i++) {
				his z[i][iframe] = fftbuf[i] * fftbuf[i];
			}
			if ((iframe % 10) == 1) {
				Melder_progress ((double) iframe / nFrames, L"Cepstrogram analysis of frame ",
					 Melder_integer (iframe), L" out of ", Melder_integer (nFrames), L".");
			}
		}
		return him.transfer();
	} catch (MelderError) {
		Melder_throw (me, ": no Cepstrogram created.");
	}
}
Пример #10
0
autoSound SpeechSynthesizer_to_Sound (SpeechSynthesizer me, const char32 *text, autoTextGrid *tg, autoTable *events) {
	try {
		int fsamp = espeak_Initialize (AUDIO_OUTPUT_SYNCHRONOUS, 0, nullptr, // 5000ms
			espeakINITIALIZE_PHONEME_EVENTS|espeakINITIALIZE_PHONEME_IPA);
		if (fsamp == -1) {
			Melder_throw (U"Internal espeak error.");
		}
		int synth_flags = espeakCHARS_WCHAR;
		if (my d_inputTextFormat == SpeechSynthesizer_INPUT_TAGGEDTEXT) {
			synth_flags |= espeakSSML;
		}
		if (my d_inputTextFormat != SpeechSynthesizer_INPUT_TEXTONLY) {
			synth_flags |= espeakPHONEMES;
		}
		option_phoneme_events = espeakINITIALIZE_PHONEME_EVENTS; // extern int option_phoneme_events;
		if (my d_outputPhonemeCoding == SpeechSynthesizer_PHONEMECODINGS_IPA) {
			option_phoneme_events |= espeakINITIALIZE_PHONEME_IPA;
		}

		espeak_SetParameter (espeakRATE, my d_wordsPerMinute, 0);
		espeak_SetParameter (espeakPITCH, my d_pitchAdjustment, 0);
		espeak_SetParameter (espeakRANGE, my d_pitchRange, 0);
		const char32 *voiceLanguageCode = SpeechSynthesizer_getVoiceLanguageCodeFromName (me, my d_voiceLanguageName);
		const char32 *voiceVariantCode = SpeechSynthesizer_getVoiceVariantCodeFromName (me, my d_voiceVariantName);
		espeakdata_SetVoiceByName ((const char *) Melder_peek32to8 (voiceLanguageCode), 
			(const char *) Melder_peek32to8 (voiceVariantCode));

		espeak_SetParameter (espeakWORDGAP, my d_wordgap * 100, 0); // espeak wordgap is in units of 10 ms
		espeak_SetParameter (espeakCAPITALS, 0, 0);
		espeak_SetParameter (espeakPUNCTUATION, espeakPUNCT_NONE, 0);

		espeak_SetSynthCallback (synthCallback);

		my d_events = Table_createWithColumnNames (0, U"time type type-t t-pos length a-pos sample id uniq");

		#ifdef _WIN32
                wchar_t *textW = Melder_peek32toW (text);
                espeak_Synth (textW, wcslen (textW) + 1, 0, POS_CHARACTER, 0, synth_flags, nullptr, me);
		#else
                espeak_Synth (text, str32len (text) + 1, 0, POS_CHARACTER, 0, synth_flags, nullptr, me);
		#endif
				
		espeak_Terminate ();
		autoSound thee = buffer_to_Sound (my d_wav, my d_numberOfSamples, my d_internalSamplingFrequency);

		if (my d_samplingFrequency != my d_internalSamplingFrequency) {
			thee = Sound_resample (thee.get(), my d_samplingFrequency, 50);
		}
		my d_numberOfSamples = 0; // re-use the wav-buffer
		if (tg) {
			double xmin = Table_getNumericValue_Assert (my d_events.get(), 1, 1);
			if (xmin > thy xmin) {
				xmin = thy xmin;
			}
			double xmax = Table_getNumericValue_Assert (my d_events.get(), my d_events -> rows.size, 1);
			if (xmax < thy xmax) {
				xmax = thy xmax;
			}
			autoTextGrid tg1 = Table_to_TextGrid (my d_events.get(), text, xmin, xmax);
			*tg = TextGrid_extractPart (tg1.get(), thy xmin, thy xmax, 0);
		}
		if (events) {
			Table_setEventTypeString (my d_events.get());
			*events = my d_events.move();
		}
		my d_events.reset();
		return thee;
	} catch (MelderError) {
		espeak_Terminate ();
		Melder_throw (U"Text not played.");
	}
}
Пример #11
0
Pitch Sound_to_Pitch_shs (Sound me, double timeStep, double minimumPitch,
                          double maximumFrequency, double ceiling, long maxnSubharmonics, long maxnCandidates,
                          double compressionFactor, long nPointsPerOctave) {
	try {
		double firstTime, newSamplingFrequency = 2 * maximumFrequency;
		double windowDuration = 2 / minimumPitch, halfWindow = windowDuration / 2;
		double atans = nPointsPerOctave * NUMlog2 (65.0 / 50.0) - 1;
		// Number of speech samples in the downsampled signal in each frame:
		// 100 for windowDuration == 0.04 and newSamplingFrequency == 2500
		long nx = lround (windowDuration * newSamplingFrequency);

		// The minimum number of points for the fft is 256.
		long nfft = 1;
		while ( (nfft *= 2) < nx || nfft <= 128) {
			;
		}
		long nfft2 = nfft / 2 + 1;
		double frameDuration = nfft / newSamplingFrequency;
		double df = newSamplingFrequency / nfft;

		// The number of points on the octave scale

		double fminl2 = NUMlog2 (minimumPitch), fmaxl2 = NUMlog2 (maximumFrequency);
		long nFrequencyPoints = (long) floor ((fmaxl2 - fminl2) * nPointsPerOctave);
		double dfl2 = (fmaxl2 - fminl2) / (nFrequencyPoints - 1);

		autoSound sound = Sound_resample (me, newSamplingFrequency, 50);
		long numberOfFrames;
		Sampled_shortTermAnalysis (sound.peek(), windowDuration, timeStep, &numberOfFrames, &firstTime);
		autoSound frame = Sound_createSimple (1, frameDuration, newSamplingFrequency);
		autoSound hamming = Sound_createHamming (nx / newSamplingFrequency, newSamplingFrequency);
		autoPitch thee = Pitch_create (my xmin, my xmax, numberOfFrames, timeStep, firstTime,
		                               ceiling, maxnCandidates);
		autoNUMvector<double> cc (1, numberOfFrames);
		autoNUMvector<double> specAmp (1, nfft2);
		autoNUMvector<double> fl2 (1, nfft2);
		autoNUMvector<double> yv2 (1, nfft2);
		autoNUMvector<double> arctg (1, nFrequencyPoints);
		autoNUMvector<double> al2 (1, nFrequencyPoints);

		Melder_assert (frame->nx >= nx);
		Melder_assert (hamming->nx == nx);

		// Compute the absolute value of the globally largest amplitude w.r.t. the global mean.

		double globalMean, globalPeak;
		Sound_localMean (sound.peek(), sound -> xmin, sound -> xmax, &globalMean);
		Sound_localPeak (sound.peek(), sound -> xmin, sound -> xmax, globalMean, &globalPeak);

		/*
			For the cubic spline interpolation we need the frequencies on an octave
			scale, i.e., a log2 scale. All frequencies must be DIFFERENT, otherwise
			the cubic spline interpolation will give corrupt results.
			Because log2(f==0) is not defined, we use the heuristic: f[2]-f[1] == f[3]-f[2].
		*/

		for (long i = 2; i <= nfft2; i++) {
			fl2[i] = NUMlog2 ( (i - 1) * df);
		}
		fl2[1] = 2 * fl2[2] - fl2[3];

		// Calculate frequencies regularly spaced on a log2-scale and
		// the frequency weighting function.

		for (long i = 1; i <= nFrequencyPoints; i++) {
			arctg[i] = 0.5 + atan (3 * (i - atans) / nPointsPerOctave) / NUMpi;
		}

		// Perform the analysis on all frames.

		for (long i = 1; i <= numberOfFrames; i++) {
			Pitch_Frame pitchFrame = &thy frame[i];
			double hm = 1, f0, pitch_strength, localMean, localPeak;
			double tmid = Sampled_indexToX (thee.peek(), i); /* The center of this frame */
			long nx_tmp = frame -> nx;

			// Copy a frame from the sound, apply a hamming window. Get local 'intensity'


			frame -> nx = nx; /*begin vies */
			Sound_into_Sound (sound.peek(), frame.peek(), tmid - halfWindow);
			Sounds_multiply (frame.peek(), hamming.peek());
			Sound_localMean (sound.peek(), tmid - 3 * halfWindow, tmid + 3 * halfWindow, &localMean);
			Sound_localPeak (sound.peek(), tmid - halfWindow, tmid + halfWindow, localMean, &localPeak);
			pitchFrame -> intensity = localPeak > globalPeak ? 1 : localPeak / globalPeak;
			frame -> nx = nx_tmp; /* einde vies */

			// Get the Fourier spectrum.

			autoSpectrum spec = Sound_to_Spectrum (frame.peek(), 1);
			Melder_assert (spec->nx == nfft2);

			// From complex spectrum to amplitude spectrum.

			for (long j = 1; j <= nfft2; j++) {
				double rs = spec -> z[1][j], is = spec -> z[2][j];
				specAmp[j] = sqrt (rs * rs + is * is);
			}

			// Enhance the peaks in the spectrum.

			spec_enhance_SHS (specAmp.peek(), nfft2);

			// Smooth the enhanced spectrum.

			spec_smoooth_SHS (specAmp.peek(), nfft2);

			// Go to a logarithmic scale and perform cubic spline interpolation to get
			// spectral values for the increased number of frequency points.

			NUMspline (fl2.peek(), specAmp.peek(), nfft2, 1e30, 1e30, yv2.peek());
			for (long j = 1; j <= nFrequencyPoints; j++) {
				double f = fminl2 + (j - 1) * dfl2;
				NUMsplint (fl2.peek(), specAmp.peek(), yv2.peek(), nfft2, f, &al2[j]);
			}

			// Multiply by frequency selectivity of the auditory system.

			for (long j = 1; j <= nFrequencyPoints; j++) al2[j] = al2[j] > 0 ?
				        al2[j] * arctg[j] : 0;

			// The subharmonic summation. Shift spectra in octaves and sum.

			Pitch_Frame_init (pitchFrame, maxnCandidates);
			autoNUMvector<double> sumspec (1, nFrequencyPoints);
			pitchFrame -> nCandidates = 0; /* !!!!! */

			for (long m = 1; m <= maxnSubharmonics + 1; m++) {
				long kb = 1 + (long) floor (nPointsPerOctave * NUMlog2 (m));
				for (long k = kb; k <= nFrequencyPoints; k++) {
					sumspec[k - kb + 1] += al2[k] * hm;
				}
				hm *= compressionFactor;
			}

			// First register the voiceless candidate (always present).

			Pitch_Frame_addPitch (pitchFrame, 0, 0, maxnCandidates);

			/*
				Get the best local estimates for the pitch as the maxima of the
				subharmonic sum spectrum by parabolic interpolation on three points:
				The formula for a parabole with a maximum is:
					y(x) = a - b (x - c)^2 with a, b, c >= 0
				The three points are (-x, y1), (0, y2) and (x, y3).
				The solution for a (the maximum) and c (the position) is:
				a = (2 y1 (4 y2 + y3) - y1^2 - (y3 - 4 y2)^2)/( 8 (y1 - 2 y2 + y3)
				c = dx (y1 - y3) / (2 (y1 - 2 y2 + y3))
				(b = (2 y2 - y1 - y3) / (2 dx^2) )
			*/

			for (long k = 2; k <= nFrequencyPoints - 1; k++) {
				double y1 = sumspec[k - 1], y2 = sumspec[k], y3 = sumspec[k + 1];
				if (y2 > y1 && y2 >= y3) {
					double denum = y1 - 2 * y2 + y3, tmp = y3 - 4 * y2;
					double x =  dfl2 * (y1 - y3) / (2 * denum);
					double f = pow (2, fminl2 + (k - 1) * dfl2 + x);
					double strength = (2 * y1 * (4 * y2 + y3) - y1 * y1 - tmp * tmp) / (8 * denum);
					Pitch_Frame_addPitch (pitchFrame, f, strength, maxnCandidates);
				}
			}

			/*
				Check whether f0 corresponds to an actual periodicity T = 1 / f0:
				correlate two signal periods of duration T, one starting at the
				middle of the interval and one starting T seconds before.
				If there is periodicity the correlation coefficient should be high.

				However, some sounds do not show any regularity, or very low
				frequency and regularity, and nevertheless have a definite
				pitch, e.g. Shepard sounds.
			*/

			Pitch_Frame_getPitch (pitchFrame, &f0, &pitch_strength);
			if (f0 > 0) {
				cc[i] = Sound_correlateParts (sound.peek(), tmid - 1.0 / f0, tmid, 1.0 / f0);
			}
		}

		// Base V/UV decision on correlation coefficients.
		// Resize the pitch strengths w.r.t. the cc.

		double vuvCriterium = 0.52;
		for (long i = 1; i <= numberOfFrames; i++) {
			Pitch_Frame_resizeStrengths (& thy frame[i], cc[i], vuvCriterium);
		}
		return thee.transfer();
	} catch (MelderError) {
		Melder_throw (me, U": no Pitch (shs) created.");
	}
}