Пример #1
0
void ContingencyTable_entropies (ContingencyTable me, double *h, double *hx, double *hy, double *hygx, double *hxgy, double *uygx, double *uxgy, double *uxy) {
	*h = *hx = *hy = *hxgy = *hygx = *uygx = *uxgy = *uxy = 0;

	autoNUMvector<double> rowsum (1, my numberOfRows);
	autoNUMvector<double> colsum (1, my numberOfColumns);

	// row and column totals

	double sum = 0.0;
	for (long i = 1; i <= my numberOfRows; i++) {
		for (long j = 1; j <= my numberOfColumns; j++) {
			rowsum[i] += my data[i][j];
			colsum[j] += my data[i][j];
		}
		sum += rowsum[i];
	}

	// Entropy of x distribution

	for (long j = 1; j <= my numberOfColumns; j++) {
		if (colsum[j] > 0.0) {
			double p = colsum[j] / sum;
			*hx -= p * NUMlog2 (p);
		}
	}

	// Entropy of y distribution

	for (long i = 1; i <= my numberOfRows; i++) {
		if (rowsum[i] > 0.0) {
			double p = rowsum[i] / sum;
			*hy -= p * NUMlog2 (p);
		}
	}

	// Total entropy

	for (long i = 1; i <= my numberOfRows; i++) {
		for (long j = 1; j <= my numberOfColumns; j++) {
			if (my data[i][j] > 0.0) {
				double p = my data[i][j] / sum;
				*h -= p * NUMlog2 (p);
			}
		}
	}

	// Conditional entropies

	*hygx = *h - *hx;
	*hxgy = *h - *hy;
	*uygx = (*hy - *hygx) / (*hy + TINY);
	*uxgy = (*hx - *hxgy) / (*hx + TINY);
	*uxy = 2.0 * (*hx + *hy - *h) / (*hx + *hy + TINY);
}
Пример #2
0
double structIntensity :: v_convertSpecialToStandardUnit (double value, long ilevel, int unit) {
	(void) ilevel;
	return
		unit == 1 ?
			10.0 * log10 (value) :   // value = energy
		unit == 2 ?
			10.0 * NUMlog2 (value) :   // value = sones
		value;   // value = dB
}
Пример #3
0
static double convertSpecialToStandardUnit (I, double value, long ilevel, int unit) {
	(void) void_me;
	(void) ilevel;
	return
		unit == 1 ?
			10.0 * log10 (value) :   /* value = energy */
		unit == 2 ?
			10.0 * NUMlog2 (value) :   /* value = sones */
		value;   /* value = dB */
}
Пример #4
0
static double getTransitionCost (long iframe, long icand1, long icand2, int itrack, void *closure) {
	struct fparm *me = (struct fparm *) closure;
	Formant_Frame prevFrame = & my my d_frames [iframe - 1], curFrame = & my my d_frames [iframe];
	double f1, f2;
	(void) itrack;
	if (icand1 > prevFrame -> nFormants || icand2 > curFrame -> nFormants) return 1e30;
	f1 = prevFrame -> formant [icand1]. frequency;
	f2 = curFrame -> formant [icand2]. frequency;
	/*Melder_assert (f1 > 0.0);*/
	/*Melder_assert (f2 > 0.0);*/
	return my octaveJumpCost * fabs (NUMlog2 (f1 / f2));
}
Пример #5
0
double Categories_getEntropy (Categories me) {
	long numberOfTokens = 0;
	char32 *previousString = nullptr;
	double entropy = 0.0;
	autoCategories thee = Data_copy (me);
	Categories_sort (thee.get());
	for (long i = 1; i <= thy size; i ++) {
		SimpleString s = thy at [i];
		char32 *string = s -> string;
		if (previousString && ! str32equ (string, previousString)) {
			double p = (double) numberOfTokens / thy size;
			entropy -= p * NUMlog2 (p);
			numberOfTokens = 1;
		} else {
			numberOfTokens ++;
		}
		previousString = string;
	}
	if (numberOfTokens) {
		double p = (double) numberOfTokens / thy size;
		entropy -= p * NUMlog2 (p);
	}
	return entropy;
}
Пример #6
0
autoPitch SPINET_to_Pitch (SPINET me, double harmonicFallOffSlope, double ceiling, int maxnCandidates) {
	try {
		long nPointsPerOctave = 48;
		double fmin = NUMerbToHertz (Sampled2_rowToY (me, 1));
		double fmax = NUMerbToHertz (Sampled2_rowToY (me, my ny));
		double fminl2 = NUMlog2 (fmin), fmaxl2 = NUMlog2 (fmax);
		double points = (fmaxl2 - fminl2) * nPointsPerOctave;
		double dfl2 = (fmaxl2 - fminl2) / (points - 1);
		long nFrequencyPoints = (long) floor (points);
		long maxHarmonic = (long) floor (fmax / fmin);
		double maxStrength = 0.0, unvoicedCriterium = 0.45, maxPower = 0.0;

		if (nFrequencyPoints < 2) {
			Melder_throw (U"Frequency range too small.");
		}
		if (ceiling <= fmin) {
			Melder_throw (U"Ceiling is smaller than centre frequency of lowest filter.");
		}

		autoPitch thee = Pitch_create (my xmin, my xmax, my nx, my dx, my x1, ceiling, maxnCandidates);
		autoNUMvector<double> power (1, my nx);
		autoNUMvector<double> pitch (1, nFrequencyPoints);
		autoNUMvector<double> sumspec (1, nFrequencyPoints);
		autoNUMvector<double> y (1, my ny);
		autoNUMvector<double> yv2 (1, my ny);
		autoNUMvector<double> fl2 (1, my ny);

		// From ERB's to log (f)

		for (long i = 1; i <= my ny; i++) {
			double f = NUMerbToHertz (my y1 + (i - 1) * my dy);
			fl2[i] = NUMlog2 (f);
		}

		// Determine global maximum power in frame

		for (long j = 1; j <= my nx; j++) {
			double p = 0.0;
			for (long i = 1; i <= my ny; i++) {
				p += my s[i][j];
			}
			if (p > maxPower) {
				maxPower = p;
			}
			power[j] = p;
		}
		if (maxPower == 0.0) {
			Melder_throw (U"No power");
		}

		for (long j = 1; j <= my nx; j++) {
			Pitch_Frame pitchFrame = &thy frame[j];

			pitchFrame -> intensity = power[j] / maxPower;
			for (long i = 1; i <= my ny; i++) {
				y[i] = my s[i][j];
			}
			NUMspline (fl2.peek(), y.peek(), my ny, 1e30, 1e30, yv2.peek());
			for (long k = 1; k <= nFrequencyPoints; k++) {
				double f = fminl2 + (k - 1) * dfl2;
				NUMsplint (fl2.peek(), y.peek(), yv2.peek(), my ny, f, & pitch[k]);
				sumspec[k] = 0.0;
			}

			// Formula (8): weighted harmonic summation.

			for (long m = 1; m <= maxHarmonic; m++) {
				double hm = 1 - harmonicFallOffSlope * NUMlog2 (m);
				long kb = 1 + (long) floor (nPointsPerOctave * NUMlog2 (m));
				for (long k = kb; k <= nFrequencyPoints; k++) {
					if (pitch[k] > 0.0) {
						sumspec[k - kb + 1] += pitch[k] * hm;
					}
				}
			}

			// into Pitch object

			Pitch_Frame_init (pitchFrame, maxnCandidates);
			pitchFrame -> nCandidates = 0; /* !!!!! */
			Pitch_Frame_addPitch (pitchFrame, 0, 0, maxnCandidates); /* unvoiced */

			for (long k = 2; k <= nFrequencyPoints - 1; k++) {
				double y1 = sumspec[k - 1], y2 = sumspec[k], y3 = sumspec[k + 1];
				if (y2 > y1 && y2 >= y3) {
					double denum = y1 - 2.0 * y2 + y3, tmp = y3 - 4.0 * y2;
					double x = dfl2 * (y1 - y3) / (2 * denum);
					double f = pow (2.0, fminl2 + (k - 1) * dfl2 + x);
					double strength = (2.0 * y1 * (4.0 * y2 + y3) - y1 * y1 - tmp * tmp) / (8.0 * denum);
					if (strength > maxStrength) {
						maxStrength = strength;
					}
					Pitch_Frame_addPitch (pitchFrame, f, strength, maxnCandidates);
				}
			}
		}

		// Scale the pitch strengths

		for (long j = 1; j <= my nx; j++) {
			double f0, localStrength;
			Pitch_Frame_getPitch (&thy frame[j], &f0, &localStrength);
			Pitch_Frame_resizeStrengths (&thy frame[j], localStrength / maxStrength, unvoicedCriterium);
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no Pitch created.");
	}
}
Пример #7
0
void Confusion_getEntropies (Confusion me, double *p_h, double *p_hx, double *p_hy, double *p_hygx, double *p_hxgy, double *p_uygx, double *p_uxgy, double *p_uxy) {
	double h = 0.0, hx = 0.0, hy = 0.0, hxgy = 0.0, hygx = 0.0, uygx = 0.0, uxgy = 0.0, uxy = 0.0;

	autoNUMvector<double> rowSum (1, my numberOfRows);
	autoNUMvector<double> colSum (1, my numberOfColumns);

	double sum = 0.0;
	for (long i = 1; i <= my numberOfRows; i++) {
		for (long j = 1; j <= my numberOfColumns; j++) {
			rowSum[i] += my data[i][j];
			colSum[j] += my data[i][j];
			sum += my data[i][j];
		}
	}
	for (long i = 1; i <= my numberOfRows; i++) {
		if (rowSum[i] > 0.0) {
			hy -= rowSum[i] / sum * NUMlog2 (rowSum[i] / sum);
		}
	}
	for (long j = 1; j <= my numberOfColumns; j++) {
		if (colSum[j] > 0.0) {
			hx -= colSum[j] / sum * NUMlog2 (colSum[j] / sum);
		}
	}
	for (long i = 1; i <= my numberOfRows; i++) {
		for (long j = 1; j <= my numberOfColumns; j++) {
			if (my data[i][j] > 0.0) {
				h -= my data[i][j] / sum * NUMlog2 (my data[i][j] / sum);
			}
		}
	}

	hygx = h - hx;
	hxgy = h - hy;
	uygx = (hy - hygx) / (hy + TINY);
	uxgy = (hx - hxgy) / (hx + TINY);
	uxy = 2.0 * (hx + hy - h) / (hx + hy + TINY);
	if (p_h) {
		*p_h  = h;
	}
	if (p_hx) {
		*p_hx  = hx;
	}
	if (p_hy) {
		*p_hy  = hy;
	}
	if (p_hygx) {
		*p_hygx  = hygx;
	}
	if (p_hxgy) {
		*p_hxgy  = hxgy;
	}
	if (p_uygx) {
		*p_uygx  = uygx;
	}
	if (p_uxgy) {
		*p_uxgy  = uxgy;
	}
	if (p_uxy) {
		*p_uxy  = uxy;
	}
}
Пример #8
0
static void Sound_into_PitchFrame (Sound me, Pitch_Frame pitchFrame, double t,
	double minimumPitch, int maxnCandidates, int method, double voicingThreshold, double octaveCost,
	NUMfft_Table fftTable, double dt_window, long nsamp_window, long halfnsamp_window,
	long maximumLag, long nsampFFT, long nsamp_period, long halfnsamp_period,
	long brent_ixmax, long brent_depth, double globalPeak,
	double **frame, double *ac, double *window, double *windowR,
	double *r, long *imax, double *localMean)
{
	double localPeak;
	long leftSample = Sampled_xToLowIndex (me, t), rightSample = leftSample + 1;
	long startSample, endSample;

	for (long channel = 1; channel <= my ny; channel ++) {
		/*
		 * Compute the local mean; look one longest period to both sides.
		 */
		startSample = rightSample - nsamp_period;
		endSample = leftSample + nsamp_period;
		Melder_assert (startSample >= 1);
		Melder_assert (endSample <= my nx);
		localMean [channel] = 0.0;
		for (long i = startSample; i <= endSample; i ++) {
			localMean [channel] += my z [channel] [i];
		}
		localMean [channel] /= 2 * nsamp_period;

		/*
		 * Copy a window to a frame and subtract the local mean.
		 * We are going to kill the DC component before windowing.
		 */
		startSample = rightSample - halfnsamp_window;
		endSample = leftSample + halfnsamp_window;
		Melder_assert (startSample >= 1);
		Melder_assert (endSample <= my nx);
		if (method < FCC_NORMAL) {
			for (long j = 1, i = startSample; j <= nsamp_window; j ++)
				frame [channel] [j] = (my z [channel] [i ++] - localMean [channel]) * window [j];
			for (long j = nsamp_window + 1; j <= nsampFFT; j ++)
				frame [channel] [j] = 0.0;
		} else {
			for (long j = 1, i = startSample; j <= nsamp_window; j ++)
				frame [channel] [j] = my z [channel] [i ++] - localMean [channel];
		}
	}

	/*
	 * Compute the local peak; look half a longest period to both sides.
	 */
	localPeak = 0.0;
	if ((startSample = halfnsamp_window + 1 - halfnsamp_period) < 1) startSample = 1;
	if ((endSample = halfnsamp_window + halfnsamp_period) > nsamp_window) endSample = nsamp_window;
	for (long channel = 1; channel <= my ny; channel ++) {
		for (long j = startSample; j <= endSample; j ++) {
			double value = fabs (frame [channel] [j]);
			if (value > localPeak) localPeak = value;
		}
	}
	pitchFrame->intensity = localPeak > globalPeak ? 1.0 : localPeak / globalPeak;

	/*
	 * Compute the correlation into the array 'r'.
	 */
	if (method >= FCC_NORMAL) {
		double startTime = t - 0.5 * (1.0 / minimumPitch + dt_window);
		long localSpan = maximumLag + nsamp_window, localMaximumLag, offset;
		if ((startSample = Sampled_xToLowIndex (me, startTime)) < 1) startSample = 1;
		if (localSpan > my nx + 1 - startSample) localSpan = my nx + 1 - startSample;
		localMaximumLag = localSpan - nsamp_window;
		offset = startSample - 1;
		double sumx2 = 0;   // sum of squares
		for (long channel = 1; channel <= my ny; channel ++) {
			double *amp = my z [channel] + offset;
			for (long i = 1; i <= nsamp_window; i ++) {
				double x = amp [i] - localMean [channel];
				sumx2 += x * x;
			}
		}
		double sumy2 = sumx2;   // at zero lag, these are still equal
		r [0] = 1.0;
		for (long i = 1; i <= localMaximumLag; i ++) {
			double product = 0.0;
			for (long channel = 1; channel <= my ny; channel ++) {
				double *amp = my z [channel] + offset;
				double y0 = amp [i] - localMean [channel];
				double yZ = amp [i + nsamp_window] - localMean [channel];
				sumy2 += yZ * yZ - y0 * y0;
				for (long j = 1; j <= nsamp_window; j ++) {
					double x = amp [j] - localMean [channel];
					double y = amp [i + j] - localMean [channel];
					product += x * y;
				}
			}
			r [- i] = r [i] = product / sqrt (sumx2 * sumy2);
		}
	} else {

		/*
		 * The FFT of the autocorrelation is the power spectrum.
		 */
		for (long i = 1; i <= nsampFFT; i ++) {
			ac [i] = 0.0;
		}
		for (long channel = 1; channel <= my ny; channel ++) {
			NUMfft_forward (fftTable, frame [channel]);   // complex spectrum
			ac [1] += frame [channel] [1] * frame [channel] [1];   // DC component
			for (long i = 2; i < nsampFFT; i += 2) {
				ac [i] += frame [channel] [i] * frame [channel] [i] + frame [channel] [i+1] * frame [channel] [i+1];   // power spectrum
			}
			ac [nsampFFT] += frame [channel] [nsampFFT] * frame [channel] [nsampFFT];   // Nyquist frequency
		}
		NUMfft_backward (fftTable, ac);   /* Autocorrelation. */

		/*
		 * Normalize the autocorrelation to the value with zero lag,
		 * and divide it by the normalized autocorrelation of the window.
		 */
		r [0] = 1.0;
		for (long i = 1; i <= brent_ixmax; i ++)
			r [- i] = r [i] = ac [i + 1] / (ac [1] * windowR [i + 1]);
	}

	/*
	 * Register the first candidate, which is always present: voicelessness.
	 */
	pitchFrame->nCandidates = 1;
	pitchFrame->candidate[1].frequency = 0.0;   // voiceless: always present
	pitchFrame->candidate[1].strength = 0.0;

	/*
	 * Shortcut: absolute silence is always voiceless.
	 * We are done for this frame.
	 */
	if (localPeak == 0) return;

	/*
	 * Find the strongest maxima of the correlation of this frame, 
	 * and register them as candidates.
	 */
	imax [1] = 0;
	for (long i = 2; i < maximumLag && i < brent_ixmax; i ++)
		if (r [i] > 0.5 * voicingThreshold &&   // not too unvoiced?
			r [i] > r [i-1] && r [i] >= r [i+1])   // maximum?
	{
		int place = 0;

		/*
		 * Use parabolic interpolation for first estimate of frequency,
		 * and sin(x)/x interpolation to compute the strength of this frequency.
		 */
		double dr = 0.5 * (r [i+1] - r [i-1]), d2r = 2 * r [i] - r [i-1] - r [i+1];
		double frequencyOfMaximum = 1 / my dx / (i + dr / d2r);
		long offset = - brent_ixmax - 1;
		double strengthOfMaximum = /* method & 1 ? */
			NUM_interpolate_sinc (& r [offset], brent_ixmax - offset, 1 / my dx / frequencyOfMaximum - offset, 30)
			/* : r [i] + 0.5 * dr * dr / d2r */;
		/* High values due to short windows are to be reflected around 1. */
		if (strengthOfMaximum > 1.0) strengthOfMaximum = 1.0 / strengthOfMaximum;

		/*
		 * Find a place for this maximum.
		 */
		if (pitchFrame->nCandidates < maxnCandidates) {   // is there still a free place?
			place = ++ pitchFrame->nCandidates;
		} else {
			/* Try the place of the weakest candidate so far. */
			double weakest = 2;
			for (int iweak = 2; iweak <= maxnCandidates; iweak ++) {
				/* High frequencies are to be favoured */
				/* if we want to analyze a perfectly periodic signal correctly. */
				double localStrength = pitchFrame->candidate[iweak].strength - octaveCost *
					NUMlog2 (minimumPitch / pitchFrame->candidate[iweak].frequency);
				if (localStrength < weakest) { weakest = localStrength; place = iweak; }
			}
			/* If this maximum is weaker than the weakest candidate so far, give it no place. */
			if (strengthOfMaximum - octaveCost * NUMlog2 (minimumPitch / frequencyOfMaximum) <= weakest)
				place = 0;
		}
		if (place) {   // have we found a place for this candidate?
			pitchFrame->candidate[place].frequency = frequencyOfMaximum;
			pitchFrame->candidate[place].strength = strengthOfMaximum;
			imax [place] = i;
		}
	}

	/*
	 * Second pass: for extra precision, maximize sin(x)/x interpolation ('sinc').
	 */
	for (long i = 2; i <= pitchFrame->nCandidates; i ++) {
		if (method != AC_HANNING || pitchFrame->candidate[i].frequency > 0.0 / my dx) {
			double xmid, ymid;
			long offset = - brent_ixmax - 1;
			ymid = NUMimproveMaximum (& r [offset], brent_ixmax - offset, imax [i] - offset,
				pitchFrame->candidate[i].frequency > 0.3 / my dx ? NUM_PEAK_INTERPOLATE_SINC700 : brent_depth, & xmid);
			xmid += offset;
			pitchFrame->candidate[i].frequency = 1.0 / my dx / xmid;
			if (ymid > 1.0) ymid = 1.0 / ymid;
			pitchFrame->candidate[i].strength = ymid;
		}
	}
}
Пример #9
0
void Pitch_pathFinder (Pitch me, double silenceThreshold, double voicingThreshold,
	double octaveCost, double octaveJumpCost, double voicedUnvoicedCost,
	double ceiling, int pullFormants)
{
	if (Melder_debug == 33)
		Melder_casual (U"Pitch path finder:"
			U"\nSilence threshold = ", silenceThreshold,
			U"\nVoicing threshold = ", voicingThreshold,
			U"\nOctave cost = ", octaveCost,
			U"\nOctave jump cost = ", octaveJumpCost,
			U"\nVoiced/unvoiced cost = ", voicedUnvoicedCost,
			U"\nCeiling = ", ceiling,
			U"\nPull formants = ", pullFormants);
	try {
		long maxnCandidates = Pitch_getMaxnCandidates (me);
		long place;
		volatile double maximum, value;
		double ceiling2 = pullFormants ? 2 * ceiling : ceiling;
		/* Next three lines 20011015 */
		double timeStepCorrection = 0.01 / my dx;
		octaveJumpCost *= timeStepCorrection;
		voicedUnvoicedCost *= timeStepCorrection;

		my ceiling = ceiling;
		autoNUMmatrix <double> delta (1, my nx, 1, maxnCandidates);
		autoNUMmatrix <long> psi (1, my nx, 1, maxnCandidates);

		for (long iframe = 1; iframe <= my nx; iframe ++) {
			Pitch_Frame frame = & my frame [iframe];
			double unvoicedStrength = silenceThreshold <= 0 ? 0 :
				2 - frame->intensity / (silenceThreshold / (1 + voicingThreshold));
			unvoicedStrength = voicingThreshold + (unvoicedStrength > 0 ? unvoicedStrength : 0);
			for (long icand = 1; icand <= frame->nCandidates; icand ++) {
				Pitch_Candidate candidate = & frame->candidate [icand];
				int voiceless = candidate->frequency == 0 || candidate->frequency > ceiling2;
				delta [iframe] [icand] = voiceless ? unvoicedStrength :
					candidate->strength - octaveCost * NUMlog2 (ceiling / candidate->frequency);
			}
		}

		/* Look for the most probable path through the maxima. */
		/* There is a cost for the voiced/unvoiced transition, */
		/* and a cost for a frequency jump. */

		for (long iframe = 2; iframe <= my nx; iframe ++) {
			Pitch_Frame prevFrame = & my frame [iframe - 1], curFrame = & my frame [iframe];
			double *prevDelta = delta [iframe - 1], *curDelta = delta [iframe];
			long *curPsi = psi [iframe];
			for (long icand2 = 1; icand2 <= curFrame -> nCandidates; icand2 ++) {
				double f2 = curFrame -> candidate [icand2]. frequency;
				maximum = -1e30;
				place = 0;
				for (long icand1 = 1; icand1 <= prevFrame -> nCandidates; icand1 ++) {
					double f1 = prevFrame -> candidate [icand1]. frequency;
					double transitionCost;
					bool previousVoiceless = f1 <= 0 || f1 >= ceiling2;
					bool currentVoiceless = f2 <= 0 || f2 >= ceiling2;
					if (currentVoiceless) {
						if (previousVoiceless) {
							transitionCost = 0;   // both voiceless
						} else {
							transitionCost = voicedUnvoicedCost;   // voiced-to-unvoiced transition
						}
					} else {
						if (previousVoiceless) {
							transitionCost = voicedUnvoicedCost;   // unvoiced-to-voiced transition
							if (Melder_debug == 30) {
								/*
								 * Try to take into account a frequency jump across a voiceless stretch.
								 */
								long place1 = icand1;
								for (long jframe = iframe - 2; jframe >= 1; jframe --) {
									place1 = psi [jframe + 1] [place1];
									f1 = my frame [jframe]. candidate [place1]. frequency;
									if (f1 > 0 && f1 < ceiling) {
										transitionCost += octaveJumpCost * fabs (NUMlog2 (f1 / f2)) / (iframe - jframe);
										break;
									}
								}
							}
						} else {
							transitionCost = octaveJumpCost * fabs (NUMlog2 (f1 / f2));   // both voiced
						}
					}
					value = prevDelta [icand1] - transitionCost + curDelta [icand2];
					//if (Melder_debug == 33) Melder_casual ("Frame %ld, current candidate %ld (delta %g), previous candidate %ld (delta %g), "
					//	"transition cost %g, value %g, maximum %g", iframe, icand2, curDelta [icand2], icand1, prevDelta [icand1], transitionCost, value, maximum);
					if (value > maximum) {
						maximum = value;
						place = icand1;
					} else if (value == maximum) {
						if (Melder_debug == 33)
							Melder_casual (
								U"A tie in frame ", iframe,
								U", current candidate ", icand2,
								U", previous candidate ", icand1
							);
					}
				}
				curDelta [icand2] = maximum;
				curPsi [icand2] = place;
			}
		}

		/* Find the end of the most probable path. */

		place = 1;
		maximum = delta [my nx] [place];
		for (long icand = 2; icand <= my frame [my nx]. nCandidates; icand ++) {
			if (delta [my nx] [icand] > maximum) {
				place = icand;
				maximum = delta [my nx] [place];
			}
		}

		/* Backtracking: follow the path backwards. */

		for (long iframe = my nx; iframe >= 1; iframe --) {
			if (Melder_debug == 33)
				Melder_casual (
					U"Frame ", iframe, U":",
					U" swapping candidates 1 and ", place
				);
			Pitch_Frame frame = & my frame [iframe];
			structPitch_Candidate help = frame -> candidate [1];
			frame -> candidate [1] = frame -> candidate [place];
			frame -> candidate [place] = help;
			place = psi [iframe] [place];   // This assignment is challenging to CodeWarrior 11.
		}

		/* Pull formants: devoice frames with frequencies between ceiling and ceiling2. */

		if (ceiling2 > ceiling) {
			if (Melder_debug == 33)
				Melder_casual (U"Pulling formants...");
			for (long iframe = my nx; iframe >= 1; iframe --) {
				Pitch_Frame frame = & my frame [iframe];
				Pitch_Candidate winner = & frame -> candidate [1];
				double f = winner -> frequency;
				if (f > ceiling && f <= ceiling2) {
					for (long icand = 2; icand <= frame -> nCandidates; icand ++) {
						Pitch_Candidate loser = & frame -> candidate [icand];
						if (loser -> frequency == 0.0) {
							structPitch_Candidate help = * winner;
							* winner = * loser;
							* loser = help;
							break;
						}
					}
				}
			}
		}
	} catch (MelderError) {
		Melder_throw (me, U": path not found.");
	}
}
Pitch Sound_to_Pitch_any (Sound me, double dt,     /*timeStepStradygy related*/
                         double minimumPitch,      /*Pitch settings realted*/
						 double periodsPerWindow,  /*kTimeSoundAnalysisEditor_pitch_analysisMethod  related*/
						 int maxnCandidates, 
						 int method,               /*method related*/
                         double silenceThreshold, double voicingThreshold, double octaveCost, double octaveJumpCost, 
						 double voicedUnvoicedCost, double ceiling)
{
	  NUMfft_Table fftTable = NUMfft_Table_create();
	  double duration, t1;
	  double dt_window;                       /* Window length in seconds. */
	  long nsamp_window, halfnsamp_window;   /* Number of samples per window. */
	  long nFrames, minimumLag, maximumLag;
	  long iframe, nsampFFT;
	  double interpolation_depth;
	  long nsamp_period, halfnsamp_period;   /* Number of samples in longest period. */
	  long brent_ixmax, brent_depth;
	  double brent_accuracy;                 /* Obsolete. */
	  double globalPeak;
	  
	   if (maxnCandidates < 2 || method < AC_HANNING && method > FCC_ACCURATE)
	   {
	       std::cout<<"Error: maxnCandidates: "<<maxnCandidates<<" method: "<<method<<"."<<std::endl;
		   std::cout<<"Sound_to_Pitch.cpp: Line 13. 69"<<std::endl;
		   return NULL;
	   }
	  
	   if (maxnCandidates < ceiling / minimumPitch) maxnCandidates = ceiling / minimumPitch;
 
	   if (dt <= 0.0) dt = periodsPerWindow / minimumPitch / 4.0;  /* e.g. 3 periods, 75 Hz: 10 milliseconds. */

		switch (method) {
			case AC_HANNING:
				brent_depth = NUM_PEAK_INTERPOLATE_SINC70;
				brent_accuracy = 1e-7;
				interpolation_depth = 0.5;
				break;
			case AC_GAUSS:
				periodsPerWindow *= 2;       /* Because Gaussian window is twice as long. */
				brent_depth = NUM_PEAK_INTERPOLATE_SINC700;
				brent_accuracy = 1e-11;
				interpolation_depth = 0.25;   /* Because Gaussian window is twice as long. */
				break;
			case FCC_NORMAL:
				brent_depth = NUM_PEAK_INTERPOLATE_SINC70;
				brent_accuracy = 1e-7;
				interpolation_depth = 1.0;
				break;
			case FCC_ACCURATE:
				brent_depth = NUM_PEAK_INTERPOLATE_SINC700;
				brent_accuracy = 1e-11;
				interpolation_depth = 1.0;
				break;
		}
		duration = my dx * my nx;
		if (minimumPitch < periodsPerWindow / duration) {
		     std::cout<<"To analyse this Sound, minimum pitch must not be less than "<< periodsPerWindow / duration<<" Hz."<<std::endl;
			 std::cout<<"Sound_to_Pitch.cpp: Line 31.103"<<std::endl;
			 return NULL;
		}
		
	   /*
		 * Determine the number of samples in the longest period.
		 * We need this to compute the local mean of the sound (looking one period in both directions),
		 * and to compute the local peak of the sound (looking half a period in both directions).
		 */
		nsamp_period = floor(1 / my dx / minimumPitch);
		halfnsamp_period = nsamp_period / 2 + 1;

		if (ceiling > 0.5 / my dx) ceiling = 0.5 / my dx;
		
	    // Determine window length in seconds and in samples.
		dt_window = periodsPerWindow / minimumPitch;
		nsamp_window = floor (dt_window / my dx);
		halfnsamp_window = nsamp_window / 2 - 1;
		if (halfnsamp_window < 2){
			std::cout<<"Analysis window too short."<<std::endl;
			std::cout<<"Sound_to_Pitch.cpp: Line 31.123"<<std::endl;
	        return NULL;		
		}
		nsamp_window = halfnsamp_window * 2;
		
	    // Determine the minimum and maximum lags.
		minimumLag = floor (1 / my dx / ceiling);
		if (minimumLag < 2) minimumLag = 2;
		maximumLag = floor (nsamp_window / periodsPerWindow) + 2;
		if (maximumLag > nsamp_window) maximumLag = nsamp_window;

		/*
		 * Determine the number of frames.
		 * Fit as many frames as possible symmetrically in the total duration.
		 * We do this even for the forward cross-correlation method,
		 * because that allows us to compare the two methods.
		 */  
	   if(!Sampled_shortTermAnalysis (me, method >= FCC_NORMAL ? 1 / minimumPitch + dt_window : dt_window, dt, & nFrames, & t1)){
           std::cout<<"The pitch analysis would give zero pitch frames."<<std::endl;   
           std::cout<<"Sound_to_Pitch.cpp: Line 31.142"<<std::endl;		   
		   return NULL;
	   }
	   	
	  // Create the resulting pitch contour. 
	    Pitch thee = Pitch_create (my xmin, my xmax, nFrames, dt, t1, ceiling, maxnCandidates);     
       
	   // Compute the global absolute peak for determination of silence threshold.
		globalPeak = 0.0;
		for (long channel = 1; channel <= my ny; channel ++) {
			double mean = 0.0;
			for (long i = 1; i <= my nx; i ++) {
				mean += my z [channel] [i];
			}
			mean /= my nx;
			for (long i = 1; i <= my nx; i ++) {
				double value = fabs (my z [channel] [i] - mean);
				if (value > globalPeak) globalPeak = value;
			}
		}
		if (globalPeak == 0.0)   return thee;
		
	   double **frame, *ac, *window, *windowR;	
	   
	   if (method >= FCC_NORMAL) {   /* For cross-correlation analysis. */			
		   // Create buffer for cross-correlation analysis.
		    frame = (double **)malloc(sizeof(double *) * (my ny + 1));
			for(long i = 1; i <= my ny; ++ i){
			   frame[i] = (double *)malloc(sizeof(double) * (nsamp_window + 1));
			   for(long j = 1; j <= nsamp_window; ++ j)
			      frame[i][j] = 0.0;
		    }   /****frame.reset (1, my ny, 1, nsamp_window);****/
				  
			brent_ixmax = nsamp_window * interpolation_depth;
		} else {   /* For autocorrelation analysis. */		   
		   /*
			* Compute the number of samples needed for doing FFT.
			* To avoid edge effects, we have to append zeroes to the window.
			* The maximum lag considered for maxima is maximumLag.
			* The maximum lag used in interpolation is nsamp_window * interpolation_depth.
			*/
			nsampFFT = 1; 
			while (nsampFFT < nsamp_window * (1 + interpolation_depth))  nsampFFT *= 2;
			
			// Create buffers for autocorrelation analysis.
		    frame = (double **)malloc(sizeof(double *) * (my ny + 1));
			for(long i = 1; i <= my ny; ++ i){
			   frame [i] = (double *)malloc(sizeof(double) * (nsampFFT + 1));
			   for(long j = 0; j <= nsampFFT; ++ j)
			      frame[i][j] = 0.0;
		    }  /****frame.reset (1, my ny, 1, nsampFFT);****/
			
			window = (double *)malloc(sizeof(double) * (nsamp_window + 1));
			for(long i = 0; i <= nsamp_window; ++ i)
			     window[i] = 0.0;
			/****window.reset (1, nsamp_window);****/		
			
			windowR = (double *)malloc(sizeof(double) * (nsampFFT + 1));
			ac = (double *)malloc(sizeof(double) * (nsampFFT + 1));
			for(long i = 0; i <= nsampFFT; ++ i)
			     windowR[i] = ac[i] = 0.0;
		     /****windowR.reset (1, nsampFFT); ac.reset (1, nsampFFT); ****/
			
			NUMfft_Table_init (fftTable, nsampFFT);
			
			/*
			* A Gaussian or Hanning window is applied against phase effects.
			* The Hanning window is 2 to 5 dB better for 3 periods/window.
			* The Gaussian window is 25 to 29 dB better for 6 periods/window.
			*/
			if (method == AC_GAUSS) { /* Gaussian window. */
				double imid = 0.5 * (nsamp_window + 1), edge = exp (-12.0);
				for (long i = 1; i <= nsamp_window; i ++)
					window[i] = (exp(-48.0*(i-imid)*(i-imid) /
						(nsamp_window + 1) / (nsamp_window + 1)) - edge) / (1 - edge);
			} else {  /* Hanning window*/
				for (long i = 1; i <= nsamp_window; i ++) 
					window [i] = 0.5 - 0.5 * cos (i * 2 * NUMpi / (nsamp_window + 1));
			}
			    
			// Compute the normalized autocorrelation of the window.
			for (long i = 1; i <= nsamp_window; i ++) windowR [i] = window [i];
			NUMfft_forward (fftTable, windowR);
			windowR [1] *= windowR [1];   // DC component
			for (long i = 2; i < nsampFFT; i += 2) {
				windowR [i] = windowR [i] * windowR [i] + windowR [i+1] * windowR [i+1];
				windowR [i + 1] = 0.0;   // power spectrum: square and zero
			}
			windowR [nsampFFT] *= windowR [nsampFFT];   // Nyquist frequency
			NUMfft_backward (fftTable, windowR);   // autocorrelation
			for (long i = 2; i <= nsamp_window; i ++) windowR [i] /= windowR [1];   // normalize
			windowR [1] = 1.0;   // normalize

			brent_ixmax = nsamp_window * interpolation_depth;
		}
		
	   double *r = (double *) malloc( sizeof(double) * (2 * (nsamp_window + 1) + 1) );
	   r += nsamp_window + 1;                                       //make "r" become a symetrical vectr 
	   long *imax = (long *) malloc( sizeof(long) * (maxnCandidates + 1));
	   double *localMean = (double *) malloc( sizeof(double) * (my ny + 1));
	   
	   for(iframe = 1; iframe <= nFrames; iframe ++){
	        Pitch_Frame pitchFrame = & thy frame[iframe];
			double t = thy x1 + (iframe - 1) *(thy dx), localPeak;
			long leftSample = (long) floor((t - my x1) / my dx) + 1;
			long rightSample = leftSample + 1;
			long startSample, endSample;
			
		   for(long channel = 1; channel <= my ny; ++ channel){   //Compute the local mean; look one longest period to both sides.
			    startSample = rightSample - nsamp_period;
				endSample = leftSample + nsamp_period;
				if ( startSample < 0 ) {
				    std::cout<<"StartSample < 1"<<std::endl;
					std::cout<<"Sound_to_Pitch.cpp: Line 31"<<std::endl;
					return NULL;
				}
				
				if (endSample > my nx){
				    std::cout<<"EndSample > my nx"<<std::endl;
					std::cout<<"Sound_to_Pitch.cpp: Line 31.262"<<std::endl;
					return NULL;
				}
				
				localMean[channel] = 0.0;
				for (long i = startSample; i <= endSample; i ++) {    
					localMean[channel] += my z[channel][i];
				}
				localMean[channel] /= 2 * nsamp_period;
		
				// Copy a window to a frame and subtract the local mean. We are going to kill the DC component before windowing.	 
				startSample = rightSample - halfnsamp_window;
				endSample = leftSample + halfnsamp_window;
				
				if ( startSample < 1 ) {
				    std::cout<<"StartSample < 1"<<std::endl;
					std::cout<<"Sound_to_Pitch.cpp: Line 31.281"<<std::endl;
					return NULL;
				}
				
				if (endSample > my nx){
				    std::cout<<"EndSample > my nx"<<std::endl;
					std::cout<<"Sound_to_Pitch.cpp: Line 31.287"<<std::endl;
					return NULL;
				}
			
	           if (method < FCC_NORMAL) {
					for (long j = 1, i = startSample; j <= nsamp_window; j ++)
						frame [channel] [j] = (my z [channel] [i ++] - localMean [channel]) * window [j];
					for (long j = nsamp_window + 1; j <= nsampFFT; j ++)
						frame [channel] [j] = 0.0;
				} else {
					for (long j = 1, i = startSample; j <= nsamp_window; j ++)
						frame [channel] [j] = my z [channel] [i ++] - localMean [channel];
				}
			}
          
		// Compute the local peak; look half a longest period to both sides.
            localPeak = 0.0;
			if ((startSample = halfnsamp_window + 1 - halfnsamp_period) < 1) startSample = 1;
			if ((endSample = halfnsamp_window + halfnsamp_period) > nsamp_window) endSample = nsamp_window;
			for (long channel = 1; channel <= my ny; channel ++) {
				for (long j = startSample; j <= endSample; j ++) {
					double value = fabs (frame [channel] [j]);
					if (value > localPeak) localPeak = value;
				}
			}
			pitchFrame->intensity = localPeak > globalPeak ? 1.0 : localPeak / globalPeak;  		
		
			// Compute the correlation into the array 'r'.		
		if (method >= FCC_NORMAL) {
			double startTime = t - 0.5 * (1.0 / minimumPitch + dt_window);
			long localSpan = maximumLag + nsamp_window, localMaximumLag, offset;
			if ((startSample = (long) floor ((startTime - my x1) / my dx)) + 1 < 1)
				 startSample = 1;
			if (localSpan > my nx + 1 - startSample) localSpan = my nx + 1 - startSample;
			localMaximumLag = localSpan - nsamp_window;
			offset = startSample - 1;
			double sumx2 = 0;                          /* Sum of squares. */
			for (long channel = 1; channel <= my ny; channel ++) {                         ///channel = 1; channel <= my ny
				double *amp = my z[channel] + offset;
				for (long i = 1; i <= nsamp_window; i ++) {                               ///i = 1; i <= nsamp_window
					double x = amp[i] - localMean[channel]; 
					sumx2 += x * x;
				}
			}
			double sumy2 = sumx2;                      /* At zero lag, these are still equal. */
			r[0] = 1.0;
			for (long i = 1; i <= localMaximumLag; i ++) {
				double product = 0.0;
				for (long channel = 1; channel <= my ny; channel ++) {                   ///channel = 1; channel <= my ny
					double *amp = my z[channel] + offset;
					double y0 = amp[i] - localMean[channel];
					double yZ = amp[i + nsamp_window] - localMean[channel];
					sumy2 += yZ * yZ - y0 * y0;
					for (long j = 1; j <= nsamp_window; j ++) {                          ///j = 1; j <= nsamp_window
						double x = amp[j] - localMean[channel];
						double y = amp[i + j] - localMean[channel];
						product += x * y;
					}
				}
				r[- i] = r[i] = product / sqrt (sumx2 * sumy2);
			}
		} else {			
			// The FFT of the autocorrelation is the power spectrum.		
	            for (long i = 1; i <= nsampFFT; i ++) 
					ac [i] = 0.0;
				for (long channel = 1; channel <= my ny; channel ++) {
					NUMfft_forward (fftTable, frame [channel]);   /* Complex spectrum. */
					ac [1] += frame [channel] [1] * frame [channel] [1];   /* DC component. */
					for (long i = 2; i < nsampFFT; i += 2) {
						ac [i] += frame [channel] [i] * frame [channel] [i] + frame [channel] [i+1] * frame [channel] [i+1]; /* Power spectrum. */
					}
					ac [nsampFFT] += frame [channel] [nsampFFT] * frame [channel] [nsampFFT];   /* Nyquist frequency. */
				}
				NUMfft_backward (fftTable, ac);   /* Autocorrelation. */

				/*
				 * Normalize the autocorrelation to the value with zero lag,
				 * and divide it by the normalized autocorrelation of the window.
				 */
				r [0] = 1.0;
				for (long i = 1; i <= brent_ixmax; i ++)
					r [- i] = r [i] = ac [i + 1] / (ac [1] * windowR [i + 1]);
		}
			
		// Create (too much) space for candidates
		Pitch_Frame_init (pitchFrame, maxnCandidates);

	    // Register the first candidate, which is always present: voicelessness.
		pitchFrame->nCandidates = 1;
		pitchFrame->candidate[1].frequency = 0.0;    /* Voiceless: always present. */
		pitchFrame->candidate[1].strength = 0.0;

		/*
		 * Shortcut: absolute silence is always voiceless.
		 * Go to next frame.
		 */
		if (localPeak == 0) continue;

		/*
		 * Find the strongest maxima of the correlation of this frame, 
		 * and register them as candidates.
		 */
		imax[1] = 0;
		for (long i = 2; i < maximumLag && i < brent_ixmax; i ++)
		    if (r[i] > 0.5 * voicingThreshold &&       /* Not too unvoiced? */
				r[i] > r[i-1] && r[i] >= r[i+1])       /* Maximum ? */
		{
			int place = 0;
		   // Use parabolic interpolation for first estimate of frequency,and sin(x)/x interpolation to compute the strength of this frequency.
			double dr = 0.5 * (r[i+1] - r[i-1]);
			double d2r = 2 * r[i] - r[i-1] - r[i+1];
			double frequencyOfMaximum = 1 / my dx / (i + dr / d2r);
			long offset = - brent_ixmax - 1;
			double strengthOfMaximum = /* method & 1 ? */
				NUM_interpolate_sinc (& r[offset], brent_ixmax - offset, 1 / my dx / frequencyOfMaximum - offset, 30)
				/* : r [i] + 0.5 * dr * dr / d2r */;
			   /* High values due to short windows are to be reflected around 1. */
			if (strengthOfMaximum > 1.0) strengthOfMaximum = 1.0 / strengthOfMaximum;

			// Find a place for this maximum.
			if (pitchFrame->nCandidates < thy maxnCandidates) { /* Is there still a free place? */
				place = ++ pitchFrame->nCandidates;
			} else {
			   /* Try the place of the weakest candidate so far. */
				double weakest = 2;
				for (int iweak = 2; iweak <= thy maxnCandidates; iweak ++) {   //iweak = 2; iweak <= thy maxnCandidates;
					/* High frequencies are to be favoured */
					/* if we want to analyze a perfectly periodic signal correctly. */
					double localStrength = pitchFrame->candidate[iweak].strength - octaveCost *
						NUMlog2 (minimumPitch / pitchFrame->candidate[iweak].frequency);
					if (localStrength < weakest) { 
					     weakest = localStrength; 
						 place = iweak; 
				      }
				}
				/* If this maximum is weaker than the weakest candidate so far, give it no place. */
				if (strengthOfMaximum - octaveCost * NUMlog2 (minimumPitch / frequencyOfMaximum) <= weakest)
					place = 0;
			}
			if (place) {              /* Have we found a place for this candidate? */
				pitchFrame->candidate[place].frequency = frequencyOfMaximum;
				pitchFrame->candidate[place].strength = strengthOfMaximum;
				imax [place] = i;
			}
		}
		
		// Second pass: for extra precision, maximize sin(x)/x interpolation ('sinc').
		for (long i = 2; i <= pitchFrame->nCandidates; i ++) { 
			if (method != AC_HANNING || pitchFrame->candidate[i].frequency > 0.0 / my dx) {
				double xmid, ymid;
				long offset = - brent_ixmax - 1;
				ymid = NUMimproveMaximum (& r[offset], brent_ixmax - offset, imax[i] - offset,
					pitchFrame->candidate[i].frequency > 0.3 / my dx ? NUM_PEAK_INTERPOLATE_SINC700 : brent_depth, & xmid);
				xmid += offset;
				pitchFrame->candidate[i].frequency = 1.0 / my dx / xmid;
				if (ymid > 1.0) ymid = 1.0 / ymid;
				pitchFrame->candidate[i].strength = ymid;
			}
		}
	}   /* Next frame. */
	
       Pitch_pathFinder (thee, silenceThreshold, voicingThreshold,octaveCost, octaveJumpCost,
			             voicedUnvoicedCost, ceiling, false);   
					   //false:  Melder_debug == 31 ? true : false   Melder_debug 31: Pitch analysis: formant pulling on
	return thee; 
}
Пример #11
0
Pitch SPINET_to_Pitch (SPINET me, double harmonicFallOffSlope, double ceiling, int maxnCandidates)
{
	Pitch thee = NULL;
	long i, j, k, m, nPointsPerOctave = 48;
	double fmin = NUMerbToHertz (Sampled2_rowToY (me, 1));
	double fmax = NUMerbToHertz (Sampled2_rowToY (me, my ny));
	double fminl2 = NUMlog2 (fmin), fmaxl2 = NUMlog2 (fmax);
	double points = (fmaxl2 - fminl2) * nPointsPerOctave;
	double dfl2 = (fmaxl2 - fminl2) / (points - 1);
	long nFrequencyPoints = points;
	long maxHarmonic = fmax / fmin;
	double maxStrength = 0, unvoicedCriterium = 0.45;
	double maxPower = 0, *sumspec = NULL, *power = NULL;
	double *y = NULL, *y2 = NULL, *pitch = NULL, *fl2 = NULL;
	
	if (nFrequencyPoints < 2) return Melder_errorp1 (L"SPINET_to_Pitch: frequency range too small.");
	if (ceiling <= fmin) return Melder_errorp1 (L"SPINET_to_Pitch: ceiling is smaller than centre "
		"frequency of lowest filter.");

	if (! (thee = Pitch_create (my xmin, my xmax, my nx, my dx, my x1,
				ceiling, maxnCandidates)) ||
		! (power = NUMdvector (1, my nx)) ||
		! (pitch = NUMdvector (1, nFrequencyPoints)) ||
		! (sumspec = NUMdvector (1, nFrequencyPoints)) ||
		! (y = NUMdvector (1, my ny)) ||
		! (y2 = NUMdvector (1, my ny)) ||
		! (fl2 = NUMdvector (1, my ny))) goto cleanup;
				
	/*
		From ERB's to log (f)
	*/
	
	for (i=1; i <= my ny; i++)
	{
		double f = NUMerbToHertz (my y1 + (i - 1) * my dy);
		fl2[i] = NUMlog2 (f);
	}
		
	/*
		Determine global maximum power in frame
	*/
	
	for (j=1; j <= my nx; j++)
	{
		double p = 0;
		for (i=1; i <= my ny; i++) p += my s[i][j];
		if (p > maxPower) maxPower = p;
		power[j] = p;
	}
	if (maxPower == 0) goto cleanup;
	
	for (j=1; j <= my nx; j++)
	{
		Pitch_Frame pitchFrame = &thy frame[j];
		
		pitchFrame->intensity = power[j] / maxPower;
		for (i=1; i <= my ny; i++) y[i] = my s[i][j];
		if (! NUMspline (fl2, y, my ny, 1e30, 1e30, y2)) goto cleanup; 
		for (k=1; k <= nFrequencyPoints; k++)
		{
			double f = fminl2 + (k-1) * dfl2;
			NUMsplint (fl2, y, y2, my ny, f, & pitch[k]);
			sumspec[k] = 0;
		}
		
		/*
			Formula (8): weighted harmonic summation.
		*/
		
		for (m=1; m <= maxHarmonic; m++)
		{
			double hm = 1 - harmonicFallOffSlope * NUMlog2 (m);
			long kb = 1 + floor (nPointsPerOctave * NUMlog2 (m));
			for (k=kb; k <= nFrequencyPoints; k++) 
			{
				if (pitch[k] > 0) sumspec[k-kb+1] += pitch[k] * hm;
			}
		}

		/*
			into Pitch object
		*/
		
		if (! Pitch_Frame_init (pitchFrame, maxnCandidates)) goto cleanup;
		pitchFrame->nCandidates = 0; /* !!!!! */
		Pitch_Frame_addPitch (pitchFrame, 0, 0, maxnCandidates); /* unvoiced */
			
		for (k=2; k <= nFrequencyPoints-1; k++)
		{
			double y1 = sumspec[k-1], y2 = sumspec[k], y3 = sumspec[k+1];
			if (y2 > y1 && y2 >= y3)
			{
				double denum = y1 - 2 * y2 + y3, tmp = y3 - 4 * y2;
				double x = dfl2 * (y1 - y3) / (2 * denum);
				double f = pow (2, fminl2 + (k - 1) * dfl2 + x);
				double strength = (2 * y1 * (4 * y2 + y3) - y1 * y1 - tmp * tmp) / (8 * denum);
				if (strength > maxStrength) maxStrength = strength;
				Pitch_Frame_addPitch (pitchFrame, f, strength, maxnCandidates);
			}	
		}
	}
	
	/*
		Scale the pitch strengths
	*/
	
	for (j=1; j <= my nx; j++)
	{
		double f0, localStrength;
		Pitch_Frame_getPitch (&thy frame[j], &f0, &localStrength);
		Pitch_Frame_resizeStrengths (&thy frame[j], localStrength / maxStrength, unvoicedCriterium);
	}
	
cleanup:
	NUMdvector_free (pitch, 1); NUMdvector_free (sumspec, 1);
	NUMdvector_free (y, 1); NUMdvector_free (y2, 1);
	NUMdvector_free (fl2, 1);NUMdvector_free (power, 1);
	if (! Melder_hasError()) return thee;
	forget (thee);
	return Melder_errorp1 (L"SPINET_to_Pitch: not performed.");
}
Пример #12
0
Pitch Sound_to_Pitch_shs (Sound me, double timeStep, double minimumPitch,
                          double maximumFrequency, double ceiling, long maxnSubharmonics, long maxnCandidates,
                          double compressionFactor, long nPointsPerOctave) {
	try {
		double firstTime, newSamplingFrequency = 2 * maximumFrequency;
		double windowDuration = 2 / minimumPitch, halfWindow = windowDuration / 2;
		double atans = nPointsPerOctave * NUMlog2 (65.0 / 50.0) - 1;
		// Number of speech samples in the downsampled signal in each frame:
		// 100 for windowDuration == 0.04 and newSamplingFrequency == 2500
		long nx = lround (windowDuration * newSamplingFrequency);

		// The minimum number of points for the fft is 256.
		long nfft = 1;
		while ( (nfft *= 2) < nx || nfft <= 128) {
			;
		}
		long nfft2 = nfft / 2 + 1;
		double frameDuration = nfft / newSamplingFrequency;
		double df = newSamplingFrequency / nfft;

		// The number of points on the octave scale

		double fminl2 = NUMlog2 (minimumPitch), fmaxl2 = NUMlog2 (maximumFrequency);
		long nFrequencyPoints = (long) floor ((fmaxl2 - fminl2) * nPointsPerOctave);
		double dfl2 = (fmaxl2 - fminl2) / (nFrequencyPoints - 1);

		autoSound sound = Sound_resample (me, newSamplingFrequency, 50);
		long numberOfFrames;
		Sampled_shortTermAnalysis (sound.peek(), windowDuration, timeStep, &numberOfFrames, &firstTime);
		autoSound frame = Sound_createSimple (1, frameDuration, newSamplingFrequency);
		autoSound hamming = Sound_createHamming (nx / newSamplingFrequency, newSamplingFrequency);
		autoPitch thee = Pitch_create (my xmin, my xmax, numberOfFrames, timeStep, firstTime,
		                               ceiling, maxnCandidates);
		autoNUMvector<double> cc (1, numberOfFrames);
		autoNUMvector<double> specAmp (1, nfft2);
		autoNUMvector<double> fl2 (1, nfft2);
		autoNUMvector<double> yv2 (1, nfft2);
		autoNUMvector<double> arctg (1, nFrequencyPoints);
		autoNUMvector<double> al2 (1, nFrequencyPoints);

		Melder_assert (frame->nx >= nx);
		Melder_assert (hamming->nx == nx);

		// Compute the absolute value of the globally largest amplitude w.r.t. the global mean.

		double globalMean, globalPeak;
		Sound_localMean (sound.peek(), sound -> xmin, sound -> xmax, &globalMean);
		Sound_localPeak (sound.peek(), sound -> xmin, sound -> xmax, globalMean, &globalPeak);

		/*
			For the cubic spline interpolation we need the frequencies on an octave
			scale, i.e., a log2 scale. All frequencies must be DIFFERENT, otherwise
			the cubic spline interpolation will give corrupt results.
			Because log2(f==0) is not defined, we use the heuristic: f[2]-f[1] == f[3]-f[2].
		*/

		for (long i = 2; i <= nfft2; i++) {
			fl2[i] = NUMlog2 ( (i - 1) * df);
		}
		fl2[1] = 2 * fl2[2] - fl2[3];

		// Calculate frequencies regularly spaced on a log2-scale and
		// the frequency weighting function.

		for (long i = 1; i <= nFrequencyPoints; i++) {
			arctg[i] = 0.5 + atan (3 * (i - atans) / nPointsPerOctave) / NUMpi;
		}

		// Perform the analysis on all frames.

		for (long i = 1; i <= numberOfFrames; i++) {
			Pitch_Frame pitchFrame = &thy frame[i];
			double hm = 1, f0, pitch_strength, localMean, localPeak;
			double tmid = Sampled_indexToX (thee.peek(), i); /* The center of this frame */
			long nx_tmp = frame -> nx;

			// Copy a frame from the sound, apply a hamming window. Get local 'intensity'


			frame -> nx = nx; /*begin vies */
			Sound_into_Sound (sound.peek(), frame.peek(), tmid - halfWindow);
			Sounds_multiply (frame.peek(), hamming.peek());
			Sound_localMean (sound.peek(), tmid - 3 * halfWindow, tmid + 3 * halfWindow, &localMean);
			Sound_localPeak (sound.peek(), tmid - halfWindow, tmid + halfWindow, localMean, &localPeak);
			pitchFrame -> intensity = localPeak > globalPeak ? 1 : localPeak / globalPeak;
			frame -> nx = nx_tmp; /* einde vies */

			// Get the Fourier spectrum.

			autoSpectrum spec = Sound_to_Spectrum (frame.peek(), 1);
			Melder_assert (spec->nx == nfft2);

			// From complex spectrum to amplitude spectrum.

			for (long j = 1; j <= nfft2; j++) {
				double rs = spec -> z[1][j], is = spec -> z[2][j];
				specAmp[j] = sqrt (rs * rs + is * is);
			}

			// Enhance the peaks in the spectrum.

			spec_enhance_SHS (specAmp.peek(), nfft2);

			// Smooth the enhanced spectrum.

			spec_smoooth_SHS (specAmp.peek(), nfft2);

			// Go to a logarithmic scale and perform cubic spline interpolation to get
			// spectral values for the increased number of frequency points.

			NUMspline (fl2.peek(), specAmp.peek(), nfft2, 1e30, 1e30, yv2.peek());
			for (long j = 1; j <= nFrequencyPoints; j++) {
				double f = fminl2 + (j - 1) * dfl2;
				NUMsplint (fl2.peek(), specAmp.peek(), yv2.peek(), nfft2, f, &al2[j]);
			}

			// Multiply by frequency selectivity of the auditory system.

			for (long j = 1; j <= nFrequencyPoints; j++) al2[j] = al2[j] > 0 ?
				        al2[j] * arctg[j] : 0;

			// The subharmonic summation. Shift spectra in octaves and sum.

			Pitch_Frame_init (pitchFrame, maxnCandidates);
			autoNUMvector<double> sumspec (1, nFrequencyPoints);
			pitchFrame -> nCandidates = 0; /* !!!!! */

			for (long m = 1; m <= maxnSubharmonics + 1; m++) {
				long kb = 1 + (long) floor (nPointsPerOctave * NUMlog2 (m));
				for (long k = kb; k <= nFrequencyPoints; k++) {
					sumspec[k - kb + 1] += al2[k] * hm;
				}
				hm *= compressionFactor;
			}

			// First register the voiceless candidate (always present).

			Pitch_Frame_addPitch (pitchFrame, 0, 0, maxnCandidates);

			/*
				Get the best local estimates for the pitch as the maxima of the
				subharmonic sum spectrum by parabolic interpolation on three points:
				The formula for a parabole with a maximum is:
					y(x) = a - b (x - c)^2 with a, b, c >= 0
				The three points are (-x, y1), (0, y2) and (x, y3).
				The solution for a (the maximum) and c (the position) is:
				a = (2 y1 (4 y2 + y3) - y1^2 - (y3 - 4 y2)^2)/( 8 (y1 - 2 y2 + y3)
				c = dx (y1 - y3) / (2 (y1 - 2 y2 + y3))
				(b = (2 y2 - y1 - y3) / (2 dx^2) )
			*/

			for (long k = 2; k <= nFrequencyPoints - 1; k++) {
				double y1 = sumspec[k - 1], y2 = sumspec[k], y3 = sumspec[k + 1];
				if (y2 > y1 && y2 >= y3) {
					double denum = y1 - 2 * y2 + y3, tmp = y3 - 4 * y2;
					double x =  dfl2 * (y1 - y3) / (2 * denum);
					double f = pow (2, fminl2 + (k - 1) * dfl2 + x);
					double strength = (2 * y1 * (4 * y2 + y3) - y1 * y1 - tmp * tmp) / (8 * denum);
					Pitch_Frame_addPitch (pitchFrame, f, strength, maxnCandidates);
				}
			}

			/*
				Check whether f0 corresponds to an actual periodicity T = 1 / f0:
				correlate two signal periods of duration T, one starting at the
				middle of the interval and one starting T seconds before.
				If there is periodicity the correlation coefficient should be high.

				However, some sounds do not show any regularity, or very low
				frequency and regularity, and nevertheless have a definite
				pitch, e.g. Shepard sounds.
			*/

			Pitch_Frame_getPitch (pitchFrame, &f0, &pitch_strength);
			if (f0 > 0) {
				cc[i] = Sound_correlateParts (sound.peek(), tmid - 1.0 / f0, tmid, 1.0 / f0);
			}
		}

		// Base V/UV decision on correlation coefficients.
		// Resize the pitch strengths w.r.t. the cc.

		double vuvCriterium = 0.52;
		for (long i = 1; i <= numberOfFrames; i++) {
			Pitch_Frame_resizeStrengths (& thy frame[i], cc[i], vuvCriterium);
		}
		return thee.transfer();
	} catch (MelderError) {
		Melder_throw (me, U": no Pitch (shs) created.");
	}
}