static autoSpectrum Cepstrum_to_Spectrum2 (Cepstrum me) { //TODO power cepstrum
	try {
		autoNUMfft_Table fftTable;
		long numberOfSamples = 2 * my nx - 2;

		autoNUMvector<double> fftbuf (1, numberOfSamples);
		autoSpectrum thee = Spectrum_create (0.5 / my dx, my nx);
		fftbuf[1] = sqrt (my z[1][1]);
		for (long i = 2; i <= my nx; i++) {
			fftbuf[i] = 2.0 * sqrt (my z[1][i]);
		}
		// fftbuf[my nx+1 ... numberOfSamples] = 0
		NUMfft_Table_init (&fftTable, numberOfSamples);
		NUMfft_forward (&fftTable, fftbuf.peek());
		
		thy z[1][1] = fabs (fftbuf[1]);
		for (long i = 2; i < my nx; i++) {
			double br = fftbuf[i + i - 2], bi = fftbuf[i + i - 1];
			thy z[1][i] = sqrt (br * br + bi * bi);
		}
		thy z[1][my nx] = fabs (fftbuf[numberOfSamples]);
		for (long i = 1; i <= my nx; i++) {
			thy z[1][i] = exp (NUMln10 * thy z[1][i] / 20.0) * 2e-5 / sqrt (2 * thy dx);
			thy z[2][i] = 0.0;
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no Spectrum created.");
	}
}
Cepstrum Spectrum_to_Cepstrum_hillenbrand (Spectrum me) {
	try {
		autoNUMfft_Table fftTable;
		// originalNumberOfSamplesProbablyOdd irrelevant
		if (my x1 != 0.0) {
			Melder_throw ("A Fourier-transformable Spectrum must have a first frequency of 0 Hz, not ", my x1, L" Hz.");
		}
		long numberOfSamples = my nx - 1;
		autoCepstrum thee = Cepstrum_create (0.5 / my dx, my nx);
		NUMfft_Table_init (&fftTable, my nx);
		autoNUMvector<double> amp (1, my nx);
		
		for (long i = 1; i <= my nx; i++) {
			amp [i] = my v_getValueAtSample (i, 0, 2);
		}
		NUMfft_forward (&fftTable, amp.peek());
		
		for (long i = 1; i <= my nx; i++) {
			double val = amp[i] / numberOfSamples;// scaling 1/n because ifft(fft(1))= n;
			thy z[1][i] = val * val; // power cepstrum
		}
		return thee.transfer();
	} catch (MelderError) {
		Melder_throw (me, ": not converted to Sound.");
	}
}
static autoCepstrum Spectrum_to_Cepstrum2 (Spectrum me) {
	try {
		autoNUMfft_Table fftTable;
		// originalNumberOfSamplesProbablyOdd irrelevant
		if (my x1 != 0.0) {
			Melder_throw (U"A Fourier-transformable Spectrum must have a first frequency of 0 Hz, not ", my x1, U" Hz.");
		}
		long numberOfSamples = 2 * my nx - 2;
		autoCepstrum thee = Cepstrum_create (0.5 / my dx, my nx);
		// my dx = 1 / (dT * N) = 1 / (duration of sound)
		thy dx = 1 / (my dx * numberOfSamples); // Cepstrum is on [-T/2, T/2] !
		NUMfft_Table_init (&fftTable, numberOfSamples);
		autoNUMvector<double> fftbuf (1, numberOfSamples);

		fftbuf[1] = my v_getValueAtSample (1, 0, 2);
		for (long i = 2; i < my nx; i++) {
			fftbuf [i + i - 2] = my v_getValueAtSample (i, 0, 2);
			fftbuf [i + i - 1] = 0.0;
		}
		fftbuf [numberOfSamples] = my v_getValueAtSample (my nx, 0, 2);
		NUMfft_backward (&fftTable, fftbuf.peek());
		for (long i = 1; i <= my nx; i++) {
			double val = fftbuf[i] / numberOfSamples; // scaling 1/n because ifft(fft(1))= n;
			thy z[1][i] = val * val; // power cepstrum
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": not converted to Cepstrum.");
	}
}
Example #4
0
void NUMforwardRealFastFourierTransform (double *data, long n) {
	autoNUMfft_Table table;
	NUMfft_Table_init (& table, n);
	NUMfft_forward (& table, data);

	if (n > 1) {
		// To be compatible with old behaviour
		double tmp = data[n];
		for (long i = n; i > 2; i--) {
			data[i] = data[i - 1];
		}
		data[2] = tmp;
	}
}
Example #5
0
void NUMreverseRealFastFourierTransform (double *data, long n) {
	autoNUMfft_Table table;

	if (n > 1) {
		// To be compatible with old behaviour
		double tmp = data[2];
		for (long i = 2; i < n; i++) {
			data[i] = data[i + 1];
		}
		data[n] = tmp;
	}

	NUMfft_Table_init (& table, n);
	NUMfft_backward (& table, data);
}
Example #6
0
autoSpectrum Sound_to_Spectrum (Sound me, int fast) {
	try {
		long numberOfSamples = my nx;
		if (fast) {
			numberOfSamples = 2;
			while (numberOfSamples < my nx) numberOfSamples *= 2;
		}
		long numberOfFrequencies = numberOfSamples / 2 + 1;   // 4 samples -> cos0 cos1 sin1 cos2; 5 samples -> cos0 cos1 sin1 cos2 sin2
		autoNUMvector <double> data (1, numberOfSamples);
		autoNUMfft_Table fourierTable;
		NUMfft_Table_init (& fourierTable, numberOfSamples);

		for (long i = 1; i <= my nx; i ++)
			data [i] = my ny == 1 ? my z [1] [i] : 0.5 * (my z [1] [i] + my z [2] [i]);
		NUMfft_forward (& fourierTable, data.peek());
		autoSpectrum thee = Spectrum_create (0.5 / my dx, numberOfFrequencies);
		thy dx = 1.0 / (my dx * numberOfSamples);   // override
		double *re = thy z [1];
		double *im = thy z [2];
		double scaling = my dx;
		re [1] = data [1] * scaling;
		im [1] = 0.0;
		for (long i = 2; i < numberOfFrequencies; i ++) {
			re [i] = data [i + i - 2] * scaling;
			im [i] = data [i + i - 1] * scaling;
		}
		if ((numberOfSamples & 1) != 0) {
			if (numberOfSamples > 1) {
				re [numberOfFrequencies] = data [numberOfSamples - 1] * scaling;
				im [numberOfFrequencies] = data [numberOfSamples] * scaling;
			}
		} else {
			re [numberOfFrequencies] = data [numberOfSamples] * scaling;
			im [numberOfFrequencies] = 0.0;
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": not converted to Spectrum.");
	}
}
autoSpectrogram Sound_to_Spectrogram (Sound me, double effectiveAnalysisWidth, double fmax,
	double minimumTimeStep1, double minimumFreqStep1, enum kSound_to_Spectrogram_windowShape windowType,
	double maximumTimeOversampling, double maximumFreqOversampling)
{
	try {
		double nyquist = 0.5 / my dx;
		double physicalAnalysisWidth =
			windowType == kSound_to_Spectrogram_windowShape_GAUSSIAN ? 2 * effectiveAnalysisWidth : effectiveAnalysisWidth;
		double effectiveTimeWidth = effectiveAnalysisWidth / sqrt (NUMpi);
		double effectiveFreqWidth = 1 / effectiveTimeWidth;
		double minimumTimeStep2 = effectiveTimeWidth / maximumTimeOversampling;
		double minimumFreqStep2 = effectiveFreqWidth / maximumFreqOversampling;
		double timeStep = minimumTimeStep1 > minimumTimeStep2 ? minimumTimeStep1 : minimumTimeStep2;
		double freqStep = minimumFreqStep1 > minimumFreqStep2 ? minimumFreqStep1 : minimumFreqStep2;
		double duration = my dx * (double) my nx, windowssq = 0.0;

		/*
		 * Compute the time sampling.
		 */
		long nsamp_window = (long) floor (physicalAnalysisWidth / my dx);
		long halfnsamp_window = nsamp_window / 2 - 1;
		nsamp_window = halfnsamp_window * 2;
		if (nsamp_window < 1)
			Melder_throw (U"Your analysis window is too short: less than two samples.");
		if (physicalAnalysisWidth > duration)
			Melder_throw (U"Your sound is too short:\n"
				U"it should be at least as long as ",
				windowType == kSound_to_Spectrogram_windowShape_GAUSSIAN ? U"two window lengths." : U"one window length.");
		long numberOfTimes = 1 + (long) floor ((duration - physicalAnalysisWidth) / timeStep);   // >= 1
		double t1 = my x1 + 0.5 * ((double) (my nx - 1) * my dx - (double) (numberOfTimes - 1) * timeStep);
			/* Centre of first frame. */

		/*
		 * Compute the frequency sampling of the FFT spectrum.
		 */
		if (fmax <= 0.0 || fmax > nyquist) fmax = nyquist;
		long numberOfFreqs = (long) floor (fmax / freqStep);
		if (numberOfFreqs < 1) return autoSpectrogram ();
		long nsampFFT = 1;
		while (nsampFFT < nsamp_window || nsampFFT < 2 * numberOfFreqs * (nyquist / fmax))
			nsampFFT *= 2;
		long half_nsampFFT = nsampFFT / 2;

		/*
		 * Compute the frequency sampling of the spectrogram.
		 */
		long binWidth_samples = (long) floor (freqStep * my dx * nsampFFT);
		if (binWidth_samples < 1) binWidth_samples = 1;
		double binWidth_hertz = 1.0 / (my dx * nsampFFT);
		freqStep = binWidth_samples * binWidth_hertz;
		numberOfFreqs = (long) floor (fmax / freqStep);
		if (numberOfFreqs < 1) return autoSpectrogram ();

		autoSpectrogram thee = Spectrogram_create (my xmin, my xmax, numberOfTimes, timeStep, t1,
				0.0, fmax, numberOfFreqs, freqStep, 0.5 * (freqStep - binWidth_hertz));

		autoNUMvector <double> frame (1, nsampFFT);
		autoNUMvector <double> spec (1, nsampFFT);
		autoNUMvector <double> window (1, nsamp_window);
		autoNUMfft_Table fftTable;
		NUMfft_Table_init (& fftTable, nsampFFT);

		autoMelderProgress progress (U"Sound to Spectrogram...");
		for (long i = 1; i <= nsamp_window; i ++) {
			double nSamplesPerWindow_f = physicalAnalysisWidth / my dx;
			double phase = (double) i / nSamplesPerWindow_f;   // 0 .. 1
			double value;
			switch (windowType) {
				case kSound_to_Spectrogram_windowShape_SQUARE:
					value = 1.0;
				break; case kSound_to_Spectrogram_windowShape_HAMMING:
					value = 0.54 - 0.46 * cos (2.0 * NUMpi * phase);
				break; case kSound_to_Spectrogram_windowShape_BARTLETT:
					value = 1.0 - fabs ((2.0 * phase - 1.0));
				break; case kSound_to_Spectrogram_windowShape_WELCH:
					value = 1.0 - (2.0 * phase - 1.0) * (2.0 * phase - 1.0);
				break; case kSound_to_Spectrogram_windowShape_HANNING:
					value = 0.5 * (1.0 - cos (2.0 * NUMpi * phase));
				break; case kSound_to_Spectrogram_windowShape_GAUSSIAN:
				{
					double imid = 0.5 * (double) (nsamp_window + 1), edge = exp (-12.0);
					phase = ((double) i - imid) / nSamplesPerWindow_f;   /* -0.5 .. +0.5 */
					value = (exp (-48.0 * phase * phase) - edge) / (1.0 - edge);
					break;
				}
				break; default:
					value = 1.0;
			}
			window [i] = (float) value;
			windowssq += value * value;
		}
		double oneByBinWidth = 1.0 / windowssq / binWidth_samples;

		for (long iframe = 1; iframe <= numberOfTimes; iframe ++) {
			double t = Sampled_indexToX (thee.peek(), iframe);
			long leftSample = Sampled_xToLowIndex (me, t), rightSample = leftSample + 1;
			long startSample = rightSample - halfnsamp_window;
			long endSample = leftSample + halfnsamp_window;
			Melder_assert (startSample >= 1);
			Melder_assert (endSample <= my nx);
			for (long i = 1; i <= half_nsampFFT; i ++) {
				spec [i] = 0.0;
			}
			for (long channel = 1; channel <= my ny; channel ++) {
				for (long j = 1, i = startSample; j <= nsamp_window; j ++) {
					frame [j] = my z [channel] [i ++] * window [j];
				}
				for (long j = nsamp_window + 1; j <= nsampFFT; j ++) frame [j] = 0.0f;

				Melder_progress (iframe / (numberOfTimes + 1.0),
					U"Sound to Spectrogram: analysis of frame ", iframe, U" out of ", numberOfTimes);

				/* Compute Fast Fourier Transform of the frame. */

				NUMfft_forward (& fftTable, frame.peek());   // complex spectrum

				/* Put power spectrum in frame [1..half_nsampFFT + 1]. */

				spec [1] += frame [1] * frame [1];   // DC component
				for (long i = 2; i <= half_nsampFFT; i ++)
					spec [i] += frame [i + i - 2] * frame [i + i - 2] + frame [i + i - 1] * frame [i + i - 1];
				spec [half_nsampFFT + 1] += frame [nsampFFT] * frame [nsampFFT];   // Nyquist frequency. Correct??
			}
			if (my ny > 1 ) for (long i = 1; i <= half_nsampFFT; i ++) {
				spec [i] /= my ny;
			}

			/* Bin into frame [1..nBands]. */
			for (long iband = 1; iband <= numberOfFreqs; iband ++) {
				long leftsample = (iband - 1) * binWidth_samples + 1, rightsample = leftsample + binWidth_samples;
				float power = 0.0f;
				for (long i = leftsample; i < rightsample; i ++) power += spec [i];
				thy z [iband] [iframe] = power * oneByBinWidth;
			}
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": spectrogram analysis not performed.");
	}
}
PowerCepstrogram Sound_to_PowerCepstrogram_hillenbrand (Sound me, double minimumPitch, double dt) {
	try {
		// minimum analysis window has 3 periods of lowest pitch
		double analysisWidth = 3  / minimumPitch;
		if (analysisWidth > my dx * my nx) {
			analysisWidth = my dx * my nx;
		}
		double t1, samplingFrequency = 1 / my dx;
		autoSound thee;
		if (samplingFrequency > 30000) {
			samplingFrequency = samplingFrequency / 2;
			thee.reset (Sound_resample (me, samplingFrequency, 1));
		} else {
			thee.reset (Data_copy (me));
		}
		// pre-emphasis with fixed coefficient 0.9
		for (long i = thy nx; i > 1; i--) {
			thy z[1][i] -= 0.9 * thy z[1][i - 1];
		}
		long nosInWindow = analysisWidth * samplingFrequency, nFrames;
		if (nosInWindow < 8) {
			Melder_throw ("Analysis window too short.");
		}
		Sampled_shortTermAnalysis (thee.peek(), analysisWidth, dt, & nFrames, & t1);
		autoNUMvector<double> hamming (1, nosInWindow);
		for (long i = 1; i <= nosInWindow; i++) {
			hamming[i] = 0.54 -0.46 * cos(2 * NUMpi * (i - 1) / (nosInWindow - 1));
		}
		long nfft = 8; // minimum possible
		while (nfft < nosInWindow) { nfft *= 2; }
		long nfftdiv2 = nfft / 2;
		autoNUMvector<double> fftbuf (1, nfft); // "complex" array
		autoNUMvector<double> spectrum (1, nfftdiv2 + 1); // +1 needed 
		autoNUMfft_Table fftTable;
		NUMfft_Table_init (&fftTable, nfft); // sound to spectrum
		
		double qmax = 0.5 * nfft / samplingFrequency, dq = qmax / (nfftdiv2 + 1);
		autoPowerCepstrogram him = PowerCepstrogram_create (my xmin, my xmax, nFrames, dt, t1, 0, qmax, nfftdiv2+1, dq, 0);
		
		autoMelderProgress progress (L"Cepstrogram analysis");
		
		for (long iframe = 1; iframe <= nFrames; iframe++) {
			double tbegin = t1 + (iframe - 1) * dt - analysisWidth / 2;
			tbegin = tbegin < thy xmin ? thy xmin : tbegin;
			long istart = Sampled_xToIndex (thee.peek(), tbegin);
			istart = istart < 1 ? 1 : istart;
			long iend = istart + nosInWindow - 1;
			iend = iend > thy nx ? thy nx : iend;
			for (long i = 1; i <= nosInWindow; i++) {
				fftbuf[i] = thy z[1][istart + i - 1] * hamming[i];
			}
			for (long i = nosInWindow + 1; i <= nfft; i++) { 
				fftbuf[i] = 0;
			}
			NUMfft_forward (&fftTable, fftbuf.peek());
			complexfftoutput_to_power (fftbuf.peek(), nfft, spectrum.peek(), true); // log10(|fft|^2)
			// subtract average
			double specmean = spectrum[1];
			for (long i = 2; i <= nfftdiv2 + 1; i++) {
				specmean += spectrum[i];
			}
			specmean /= nfftdiv2 + 1;
			for (long i = 1; i <= nfftdiv2 + 1; i++) {
				spectrum[i] -= specmean;
			}
			/*
			 * Here we diverge from Hillenbrand as he takes the fft of half of the spectral values.
			 * H. forgets that the actual spectrum has nfft/2+1 values. Thefore, we take the inverse
			 * transform because this keeps the number of samples a power of 2.
			 * At the same time this results in twice as much numbers in the quefrency domain, i.e. we end with nfft/2+1
			 * numbers while H. has only nfft/4!
			 */
			fftbuf[1] = spectrum[1];
			for (long i = 2; i < nfftdiv2 + 1; i++) {
				fftbuf[i+i-2] = spectrum[i];
				fftbuf[i+i-1] = 0;
			}
			fftbuf[nfft] = spectrum[nfftdiv2 + 1];
			NUMfft_backward (&fftTable, fftbuf.peek());
			for (long i = 1; i <= nfftdiv2 + 1; i++) {
				his z[i][iframe] = fftbuf[i] * fftbuf[i];
			}
			if ((iframe % 10) == 1) {
				Melder_progress ((double) iframe / nFrames, L"Cepstrogram analysis of frame ",
					 Melder_integer (iframe), L" out of ", Melder_integer (nFrames), L".");
			}
		}
		return him.transfer();
	} catch (MelderError) {
		Melder_throw (me, ": no Cepstrogram created.");
	}
}
Pitch Sound_to_Pitch_any (Sound me, double dt,     /*timeStepStradygy related*/
                         double minimumPitch,      /*Pitch settings realted*/
						 double periodsPerWindow,  /*kTimeSoundAnalysisEditor_pitch_analysisMethod  related*/
						 int maxnCandidates, 
						 int method,               /*method related*/
                         double silenceThreshold, double voicingThreshold, double octaveCost, double octaveJumpCost, 
						 double voicedUnvoicedCost, double ceiling)
{
	  NUMfft_Table fftTable = NUMfft_Table_create();
	  double duration, t1;
	  double dt_window;                       /* Window length in seconds. */
	  long nsamp_window, halfnsamp_window;   /* Number of samples per window. */
	  long nFrames, minimumLag, maximumLag;
	  long iframe, nsampFFT;
	  double interpolation_depth;
	  long nsamp_period, halfnsamp_period;   /* Number of samples in longest period. */
	  long brent_ixmax, brent_depth;
	  double brent_accuracy;                 /* Obsolete. */
	  double globalPeak;
	  
	   if (maxnCandidates < 2 || method < AC_HANNING && method > FCC_ACCURATE)
	   {
	       std::cout<<"Error: maxnCandidates: "<<maxnCandidates<<" method: "<<method<<"."<<std::endl;
		   std::cout<<"Sound_to_Pitch.cpp: Line 13. 69"<<std::endl;
		   return NULL;
	   }
	  
	   if (maxnCandidates < ceiling / minimumPitch) maxnCandidates = ceiling / minimumPitch;
 
	   if (dt <= 0.0) dt = periodsPerWindow / minimumPitch / 4.0;  /* e.g. 3 periods, 75 Hz: 10 milliseconds. */

		switch (method) {
			case AC_HANNING:
				brent_depth = NUM_PEAK_INTERPOLATE_SINC70;
				brent_accuracy = 1e-7;
				interpolation_depth = 0.5;
				break;
			case AC_GAUSS:
				periodsPerWindow *= 2;       /* Because Gaussian window is twice as long. */
				brent_depth = NUM_PEAK_INTERPOLATE_SINC700;
				brent_accuracy = 1e-11;
				interpolation_depth = 0.25;   /* Because Gaussian window is twice as long. */
				break;
			case FCC_NORMAL:
				brent_depth = NUM_PEAK_INTERPOLATE_SINC70;
				brent_accuracy = 1e-7;
				interpolation_depth = 1.0;
				break;
			case FCC_ACCURATE:
				brent_depth = NUM_PEAK_INTERPOLATE_SINC700;
				brent_accuracy = 1e-11;
				interpolation_depth = 1.0;
				break;
		}
		duration = my dx * my nx;
		if (minimumPitch < periodsPerWindow / duration) {
		     std::cout<<"To analyse this Sound, minimum pitch must not be less than "<< periodsPerWindow / duration<<" Hz."<<std::endl;
			 std::cout<<"Sound_to_Pitch.cpp: Line 31.103"<<std::endl;
			 return NULL;
		}
		
	   /*
		 * Determine the number of samples in the longest period.
		 * We need this to compute the local mean of the sound (looking one period in both directions),
		 * and to compute the local peak of the sound (looking half a period in both directions).
		 */
		nsamp_period = floor(1 / my dx / minimumPitch);
		halfnsamp_period = nsamp_period / 2 + 1;

		if (ceiling > 0.5 / my dx) ceiling = 0.5 / my dx;
		
	    // Determine window length in seconds and in samples.
		dt_window = periodsPerWindow / minimumPitch;
		nsamp_window = floor (dt_window / my dx);
		halfnsamp_window = nsamp_window / 2 - 1;
		if (halfnsamp_window < 2){
			std::cout<<"Analysis window too short."<<std::endl;
			std::cout<<"Sound_to_Pitch.cpp: Line 31.123"<<std::endl;
	        return NULL;		
		}
		nsamp_window = halfnsamp_window * 2;
		
	    // Determine the minimum and maximum lags.
		minimumLag = floor (1 / my dx / ceiling);
		if (minimumLag < 2) minimumLag = 2;
		maximumLag = floor (nsamp_window / periodsPerWindow) + 2;
		if (maximumLag > nsamp_window) maximumLag = nsamp_window;

		/*
		 * Determine the number of frames.
		 * Fit as many frames as possible symmetrically in the total duration.
		 * We do this even for the forward cross-correlation method,
		 * because that allows us to compare the two methods.
		 */  
	   if(!Sampled_shortTermAnalysis (me, method >= FCC_NORMAL ? 1 / minimumPitch + dt_window : dt_window, dt, & nFrames, & t1)){
           std::cout<<"The pitch analysis would give zero pitch frames."<<std::endl;   
           std::cout<<"Sound_to_Pitch.cpp: Line 31.142"<<std::endl;		   
		   return NULL;
	   }
	   	
	  // Create the resulting pitch contour. 
	    Pitch thee = Pitch_create (my xmin, my xmax, nFrames, dt, t1, ceiling, maxnCandidates);     
       
	   // Compute the global absolute peak for determination of silence threshold.
		globalPeak = 0.0;
		for (long channel = 1; channel <= my ny; channel ++) {
			double mean = 0.0;
			for (long i = 1; i <= my nx; i ++) {
				mean += my z [channel] [i];
			}
			mean /= my nx;
			for (long i = 1; i <= my nx; i ++) {
				double value = fabs (my z [channel] [i] - mean);
				if (value > globalPeak) globalPeak = value;
			}
		}
		if (globalPeak == 0.0)   return thee;
		
	   double **frame, *ac, *window, *windowR;	
	   
	   if (method >= FCC_NORMAL) {   /* For cross-correlation analysis. */			
		   // Create buffer for cross-correlation analysis.
		    frame = (double **)malloc(sizeof(double *) * (my ny + 1));
			for(long i = 1; i <= my ny; ++ i){
			   frame[i] = (double *)malloc(sizeof(double) * (nsamp_window + 1));
			   for(long j = 1; j <= nsamp_window; ++ j)
			      frame[i][j] = 0.0;
		    }   /****frame.reset (1, my ny, 1, nsamp_window);****/
				  
			brent_ixmax = nsamp_window * interpolation_depth;
		} else {   /* For autocorrelation analysis. */		   
		   /*
			* Compute the number of samples needed for doing FFT.
			* To avoid edge effects, we have to append zeroes to the window.
			* The maximum lag considered for maxima is maximumLag.
			* The maximum lag used in interpolation is nsamp_window * interpolation_depth.
			*/
			nsampFFT = 1; 
			while (nsampFFT < nsamp_window * (1 + interpolation_depth))  nsampFFT *= 2;
			
			// Create buffers for autocorrelation analysis.
		    frame = (double **)malloc(sizeof(double *) * (my ny + 1));
			for(long i = 1; i <= my ny; ++ i){
			   frame [i] = (double *)malloc(sizeof(double) * (nsampFFT + 1));
			   for(long j = 0; j <= nsampFFT; ++ j)
			      frame[i][j] = 0.0;
		    }  /****frame.reset (1, my ny, 1, nsampFFT);****/
			
			window = (double *)malloc(sizeof(double) * (nsamp_window + 1));
			for(long i = 0; i <= nsamp_window; ++ i)
			     window[i] = 0.0;
			/****window.reset (1, nsamp_window);****/		
			
			windowR = (double *)malloc(sizeof(double) * (nsampFFT + 1));
			ac = (double *)malloc(sizeof(double) * (nsampFFT + 1));
			for(long i = 0; i <= nsampFFT; ++ i)
			     windowR[i] = ac[i] = 0.0;
		     /****windowR.reset (1, nsampFFT); ac.reset (1, nsampFFT); ****/
			
			NUMfft_Table_init (fftTable, nsampFFT);
			
			/*
			* A Gaussian or Hanning window is applied against phase effects.
			* The Hanning window is 2 to 5 dB better for 3 periods/window.
			* The Gaussian window is 25 to 29 dB better for 6 periods/window.
			*/
			if (method == AC_GAUSS) { /* Gaussian window. */
				double imid = 0.5 * (nsamp_window + 1), edge = exp (-12.0);
				for (long i = 1; i <= nsamp_window; i ++)
					window[i] = (exp(-48.0*(i-imid)*(i-imid) /
						(nsamp_window + 1) / (nsamp_window + 1)) - edge) / (1 - edge);
			} else {  /* Hanning window*/
				for (long i = 1; i <= nsamp_window; i ++) 
					window [i] = 0.5 - 0.5 * cos (i * 2 * NUMpi / (nsamp_window + 1));
			}
			    
			// Compute the normalized autocorrelation of the window.
			for (long i = 1; i <= nsamp_window; i ++) windowR [i] = window [i];
			NUMfft_forward (fftTable, windowR);
			windowR [1] *= windowR [1];   // DC component
			for (long i = 2; i < nsampFFT; i += 2) {
				windowR [i] = windowR [i] * windowR [i] + windowR [i+1] * windowR [i+1];
				windowR [i + 1] = 0.0;   // power spectrum: square and zero
			}
			windowR [nsampFFT] *= windowR [nsampFFT];   // Nyquist frequency
			NUMfft_backward (fftTable, windowR);   // autocorrelation
			for (long i = 2; i <= nsamp_window; i ++) windowR [i] /= windowR [1];   // normalize
			windowR [1] = 1.0;   // normalize

			brent_ixmax = nsamp_window * interpolation_depth;
		}
		
	   double *r = (double *) malloc( sizeof(double) * (2 * (nsamp_window + 1) + 1) );
	   r += nsamp_window + 1;                                       //make "r" become a symetrical vectr 
	   long *imax = (long *) malloc( sizeof(long) * (maxnCandidates + 1));
	   double *localMean = (double *) malloc( sizeof(double) * (my ny + 1));
	   
	   for(iframe = 1; iframe <= nFrames; iframe ++){
	        Pitch_Frame pitchFrame = & thy frame[iframe];
			double t = thy x1 + (iframe - 1) *(thy dx), localPeak;
			long leftSample = (long) floor((t - my x1) / my dx) + 1;
			long rightSample = leftSample + 1;
			long startSample, endSample;
			
		   for(long channel = 1; channel <= my ny; ++ channel){   //Compute the local mean; look one longest period to both sides.
			    startSample = rightSample - nsamp_period;
				endSample = leftSample + nsamp_period;
				if ( startSample < 0 ) {
				    std::cout<<"StartSample < 1"<<std::endl;
					std::cout<<"Sound_to_Pitch.cpp: Line 31"<<std::endl;
					return NULL;
				}
				
				if (endSample > my nx){
				    std::cout<<"EndSample > my nx"<<std::endl;
					std::cout<<"Sound_to_Pitch.cpp: Line 31.262"<<std::endl;
					return NULL;
				}
				
				localMean[channel] = 0.0;
				for (long i = startSample; i <= endSample; i ++) {    
					localMean[channel] += my z[channel][i];
				}
				localMean[channel] /= 2 * nsamp_period;
		
				// Copy a window to a frame and subtract the local mean. We are going to kill the DC component before windowing.	 
				startSample = rightSample - halfnsamp_window;
				endSample = leftSample + halfnsamp_window;
				
				if ( startSample < 1 ) {
				    std::cout<<"StartSample < 1"<<std::endl;
					std::cout<<"Sound_to_Pitch.cpp: Line 31.281"<<std::endl;
					return NULL;
				}
				
				if (endSample > my nx){
				    std::cout<<"EndSample > my nx"<<std::endl;
					std::cout<<"Sound_to_Pitch.cpp: Line 31.287"<<std::endl;
					return NULL;
				}
			
	           if (method < FCC_NORMAL) {
					for (long j = 1, i = startSample; j <= nsamp_window; j ++)
						frame [channel] [j] = (my z [channel] [i ++] - localMean [channel]) * window [j];
					for (long j = nsamp_window + 1; j <= nsampFFT; j ++)
						frame [channel] [j] = 0.0;
				} else {
					for (long j = 1, i = startSample; j <= nsamp_window; j ++)
						frame [channel] [j] = my z [channel] [i ++] - localMean [channel];
				}
			}
          
		// Compute the local peak; look half a longest period to both sides.
            localPeak = 0.0;
			if ((startSample = halfnsamp_window + 1 - halfnsamp_period) < 1) startSample = 1;
			if ((endSample = halfnsamp_window + halfnsamp_period) > nsamp_window) endSample = nsamp_window;
			for (long channel = 1; channel <= my ny; channel ++) {
				for (long j = startSample; j <= endSample; j ++) {
					double value = fabs (frame [channel] [j]);
					if (value > localPeak) localPeak = value;
				}
			}
			pitchFrame->intensity = localPeak > globalPeak ? 1.0 : localPeak / globalPeak;  		
		
			// Compute the correlation into the array 'r'.		
		if (method >= FCC_NORMAL) {
			double startTime = t - 0.5 * (1.0 / minimumPitch + dt_window);
			long localSpan = maximumLag + nsamp_window, localMaximumLag, offset;
			if ((startSample = (long) floor ((startTime - my x1) / my dx)) + 1 < 1)
				 startSample = 1;
			if (localSpan > my nx + 1 - startSample) localSpan = my nx + 1 - startSample;
			localMaximumLag = localSpan - nsamp_window;
			offset = startSample - 1;
			double sumx2 = 0;                          /* Sum of squares. */
			for (long channel = 1; channel <= my ny; channel ++) {                         ///channel = 1; channel <= my ny
				double *amp = my z[channel] + offset;
				for (long i = 1; i <= nsamp_window; i ++) {                               ///i = 1; i <= nsamp_window
					double x = amp[i] - localMean[channel]; 
					sumx2 += x * x;
				}
			}
			double sumy2 = sumx2;                      /* At zero lag, these are still equal. */
			r[0] = 1.0;
			for (long i = 1; i <= localMaximumLag; i ++) {
				double product = 0.0;
				for (long channel = 1; channel <= my ny; channel ++) {                   ///channel = 1; channel <= my ny
					double *amp = my z[channel] + offset;
					double y0 = amp[i] - localMean[channel];
					double yZ = amp[i + nsamp_window] - localMean[channel];
					sumy2 += yZ * yZ - y0 * y0;
					for (long j = 1; j <= nsamp_window; j ++) {                          ///j = 1; j <= nsamp_window
						double x = amp[j] - localMean[channel];
						double y = amp[i + j] - localMean[channel];
						product += x * y;
					}
				}
				r[- i] = r[i] = product / sqrt (sumx2 * sumy2);
			}
		} else {			
			// The FFT of the autocorrelation is the power spectrum.		
	            for (long i = 1; i <= nsampFFT; i ++) 
					ac [i] = 0.0;
				for (long channel = 1; channel <= my ny; channel ++) {
					NUMfft_forward (fftTable, frame [channel]);   /* Complex spectrum. */
					ac [1] += frame [channel] [1] * frame [channel] [1];   /* DC component. */
					for (long i = 2; i < nsampFFT; i += 2) {
						ac [i] += frame [channel] [i] * frame [channel] [i] + frame [channel] [i+1] * frame [channel] [i+1]; /* Power spectrum. */
					}
					ac [nsampFFT] += frame [channel] [nsampFFT] * frame [channel] [nsampFFT];   /* Nyquist frequency. */
				}
				NUMfft_backward (fftTable, ac);   /* Autocorrelation. */

				/*
				 * Normalize the autocorrelation to the value with zero lag,
				 * and divide it by the normalized autocorrelation of the window.
				 */
				r [0] = 1.0;
				for (long i = 1; i <= brent_ixmax; i ++)
					r [- i] = r [i] = ac [i + 1] / (ac [1] * windowR [i + 1]);
		}
			
		// Create (too much) space for candidates
		Pitch_Frame_init (pitchFrame, maxnCandidates);

	    // Register the first candidate, which is always present: voicelessness.
		pitchFrame->nCandidates = 1;
		pitchFrame->candidate[1].frequency = 0.0;    /* Voiceless: always present. */
		pitchFrame->candidate[1].strength = 0.0;

		/*
		 * Shortcut: absolute silence is always voiceless.
		 * Go to next frame.
		 */
		if (localPeak == 0) continue;

		/*
		 * Find the strongest maxima of the correlation of this frame, 
		 * and register them as candidates.
		 */
		imax[1] = 0;
		for (long i = 2; i < maximumLag && i < brent_ixmax; i ++)
		    if (r[i] > 0.5 * voicingThreshold &&       /* Not too unvoiced? */
				r[i] > r[i-1] && r[i] >= r[i+1])       /* Maximum ? */
		{
			int place = 0;
		   // Use parabolic interpolation for first estimate of frequency,and sin(x)/x interpolation to compute the strength of this frequency.
			double dr = 0.5 * (r[i+1] - r[i-1]);
			double d2r = 2 * r[i] - r[i-1] - r[i+1];
			double frequencyOfMaximum = 1 / my dx / (i + dr / d2r);
			long offset = - brent_ixmax - 1;
			double strengthOfMaximum = /* method & 1 ? */
				NUM_interpolate_sinc (& r[offset], brent_ixmax - offset, 1 / my dx / frequencyOfMaximum - offset, 30)
				/* : r [i] + 0.5 * dr * dr / d2r */;
			   /* High values due to short windows are to be reflected around 1. */
			if (strengthOfMaximum > 1.0) strengthOfMaximum = 1.0 / strengthOfMaximum;

			// Find a place for this maximum.
			if (pitchFrame->nCandidates < thy maxnCandidates) { /* Is there still a free place? */
				place = ++ pitchFrame->nCandidates;
			} else {
			   /* Try the place of the weakest candidate so far. */
				double weakest = 2;
				for (int iweak = 2; iweak <= thy maxnCandidates; iweak ++) {   //iweak = 2; iweak <= thy maxnCandidates;
					/* High frequencies are to be favoured */
					/* if we want to analyze a perfectly periodic signal correctly. */
					double localStrength = pitchFrame->candidate[iweak].strength - octaveCost *
						NUMlog2 (minimumPitch / pitchFrame->candidate[iweak].frequency);
					if (localStrength < weakest) { 
					     weakest = localStrength; 
						 place = iweak; 
				      }
				}
				/* If this maximum is weaker than the weakest candidate so far, give it no place. */
				if (strengthOfMaximum - octaveCost * NUMlog2 (minimumPitch / frequencyOfMaximum) <= weakest)
					place = 0;
			}
			if (place) {              /* Have we found a place for this candidate? */
				pitchFrame->candidate[place].frequency = frequencyOfMaximum;
				pitchFrame->candidate[place].strength = strengthOfMaximum;
				imax [place] = i;
			}
		}
		
		// Second pass: for extra precision, maximize sin(x)/x interpolation ('sinc').
		for (long i = 2; i <= pitchFrame->nCandidates; i ++) { 
			if (method != AC_HANNING || pitchFrame->candidate[i].frequency > 0.0 / my dx) {
				double xmid, ymid;
				long offset = - brent_ixmax - 1;
				ymid = NUMimproveMaximum (& r[offset], brent_ixmax - offset, imax[i] - offset,
					pitchFrame->candidate[i].frequency > 0.3 / my dx ? NUM_PEAK_INTERPOLATE_SINC700 : brent_depth, & xmid);
				xmid += offset;
				pitchFrame->candidate[i].frequency = 1.0 / my dx / xmid;
				if (ymid > 1.0) ymid = 1.0 / ymid;
				pitchFrame->candidate[i].strength = ymid;
			}
		}
	}   /* Next frame. */
	
       Pitch_pathFinder (thee, silenceThreshold, voicingThreshold,octaveCost, octaveJumpCost,
			             voicedUnvoicedCost, ceiling, false);   
					   //false:  Melder_debug == 31 ? true : false   Melder_debug 31: Pitch analysis: formant pulling on
	return thee; 
}
Example #10
0
autoSpectrum Sound_to_Spectrum (Sound me, int fast) {
	try {
		long numberOfSamples = my nx;
		const long numberOfChannels = my ny;
		if (fast) {
			numberOfSamples = 2;
			while (numberOfSamples < my nx) numberOfSamples *= 2;
		}
		long numberOfFrequencies = numberOfSamples / 2 + 1;   // 4 samples -> cos0 cos1 sin1 cos2; 5 samples -> cos0 cos1 sin1 cos2 sin2

		autoNUMvector <double> data (1, numberOfSamples);
		if (numberOfChannels == 1) {
			const double *channel = my z [1];
			for (long i = 1; i <= my nx; i ++) {
				data [i] = channel [i];
			}
			/*
				All samples from `my nx + 1` through `numberOfSamples`
				should be set to zero, but they are already zero.
			*/
			// so do nothing
		} else {
			for (long ichan = 1; ichan <= numberOfChannels; ichan ++) {
				const double *channel = my z [ichan];
				for (long i = 1; i <= my nx; i ++) {
					data [i] += channel [i];
				}
			}
			for (long i = 1; i <= my nx; i ++) {
				data [i] /= numberOfChannels;
			}
		}

		autoNUMfft_Table fourierTable;
		NUMfft_Table_init (& fourierTable, numberOfSamples);
		NUMfft_forward (& fourierTable, data.peek());

		autoSpectrum thee = Spectrum_create (0.5 / my dx, numberOfFrequencies);
		thy dx = 1.0 / (my dx * numberOfSamples);   // override
		double *re = thy z [1];
		double *im = thy z [2];
		double scaling = my dx;
		re [1] = data [1] * scaling;
		im [1] = 0.0;
		for (long i = 2; i < numberOfFrequencies; i ++) {
			re [i] = data [i + i - 2] * scaling;   // data [2], data [4], ...
			im [i] = data [i + i - 1] * scaling;   // data [3], data [5], ...
		}
		if ((numberOfSamples & 1) != 0) {
			if (numberOfSamples > 1) {
				re [numberOfFrequencies] = data [numberOfSamples - 1] * scaling;
				im [numberOfFrequencies] = data [numberOfSamples] * scaling;
			}
		} else {
			re [numberOfFrequencies] = data [numberOfSamples] * scaling;
			im [numberOfFrequencies] = 0.0;
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": not converted to Spectrum.");
	}
}