void ContingencyTable_entropies (ContingencyTable me, double *h, double *hx, double *hy, double *hygx, double *hxgy, double *uygx, double *uxgy, double *uxy) { *h = *hx = *hy = *hxgy = *hygx = *uygx = *uxgy = *uxy = 0; autoNUMvector<double> rowsum (1, my numberOfRows); autoNUMvector<double> colsum (1, my numberOfColumns); // row and column totals double sum = 0.0; for (long i = 1; i <= my numberOfRows; i++) { for (long j = 1; j <= my numberOfColumns; j++) { rowsum[i] += my data[i][j]; colsum[j] += my data[i][j]; } sum += rowsum[i]; } // Entropy of x distribution for (long j = 1; j <= my numberOfColumns; j++) { if (colsum[j] > 0.0) { double p = colsum[j] / sum; *hx -= p * NUMlog2 (p); } } // Entropy of y distribution for (long i = 1; i <= my numberOfRows; i++) { if (rowsum[i] > 0.0) { double p = rowsum[i] / sum; *hy -= p * NUMlog2 (p); } } // Total entropy for (long i = 1; i <= my numberOfRows; i++) { for (long j = 1; j <= my numberOfColumns; j++) { if (my data[i][j] > 0.0) { double p = my data[i][j] / sum; *h -= p * NUMlog2 (p); } } } // Conditional entropies *hygx = *h - *hx; *hxgy = *h - *hy; *uygx = (*hy - *hygx) / (*hy + TINY); *uxgy = (*hx - *hxgy) / (*hx + TINY); *uxy = 2.0 * (*hx + *hy - *h) / (*hx + *hy + TINY); }
double structIntensity :: v_convertSpecialToStandardUnit (double value, long ilevel, int unit) { (void) ilevel; return unit == 1 ? 10.0 * log10 (value) : // value = energy unit == 2 ? 10.0 * NUMlog2 (value) : // value = sones value; // value = dB }
static double convertSpecialToStandardUnit (I, double value, long ilevel, int unit) { (void) void_me; (void) ilevel; return unit == 1 ? 10.0 * log10 (value) : /* value = energy */ unit == 2 ? 10.0 * NUMlog2 (value) : /* value = sones */ value; /* value = dB */ }
static double getTransitionCost (long iframe, long icand1, long icand2, int itrack, void *closure) { struct fparm *me = (struct fparm *) closure; Formant_Frame prevFrame = & my my d_frames [iframe - 1], curFrame = & my my d_frames [iframe]; double f1, f2; (void) itrack; if (icand1 > prevFrame -> nFormants || icand2 > curFrame -> nFormants) return 1e30; f1 = prevFrame -> formant [icand1]. frequency; f2 = curFrame -> formant [icand2]. frequency; /*Melder_assert (f1 > 0.0);*/ /*Melder_assert (f2 > 0.0);*/ return my octaveJumpCost * fabs (NUMlog2 (f1 / f2)); }
double Categories_getEntropy (Categories me) { long numberOfTokens = 0; char32 *previousString = nullptr; double entropy = 0.0; autoCategories thee = Data_copy (me); Categories_sort (thee.get()); for (long i = 1; i <= thy size; i ++) { SimpleString s = thy at [i]; char32 *string = s -> string; if (previousString && ! str32equ (string, previousString)) { double p = (double) numberOfTokens / thy size; entropy -= p * NUMlog2 (p); numberOfTokens = 1; } else { numberOfTokens ++; } previousString = string; } if (numberOfTokens) { double p = (double) numberOfTokens / thy size; entropy -= p * NUMlog2 (p); } return entropy; }
autoPitch SPINET_to_Pitch (SPINET me, double harmonicFallOffSlope, double ceiling, int maxnCandidates) { try { long nPointsPerOctave = 48; double fmin = NUMerbToHertz (Sampled2_rowToY (me, 1)); double fmax = NUMerbToHertz (Sampled2_rowToY (me, my ny)); double fminl2 = NUMlog2 (fmin), fmaxl2 = NUMlog2 (fmax); double points = (fmaxl2 - fminl2) * nPointsPerOctave; double dfl2 = (fmaxl2 - fminl2) / (points - 1); long nFrequencyPoints = (long) floor (points); long maxHarmonic = (long) floor (fmax / fmin); double maxStrength = 0.0, unvoicedCriterium = 0.45, maxPower = 0.0; if (nFrequencyPoints < 2) { Melder_throw (U"Frequency range too small."); } if (ceiling <= fmin) { Melder_throw (U"Ceiling is smaller than centre frequency of lowest filter."); } autoPitch thee = Pitch_create (my xmin, my xmax, my nx, my dx, my x1, ceiling, maxnCandidates); autoNUMvector<double> power (1, my nx); autoNUMvector<double> pitch (1, nFrequencyPoints); autoNUMvector<double> sumspec (1, nFrequencyPoints); autoNUMvector<double> y (1, my ny); autoNUMvector<double> yv2 (1, my ny); autoNUMvector<double> fl2 (1, my ny); // From ERB's to log (f) for (long i = 1; i <= my ny; i++) { double f = NUMerbToHertz (my y1 + (i - 1) * my dy); fl2[i] = NUMlog2 (f); } // Determine global maximum power in frame for (long j = 1; j <= my nx; j++) { double p = 0.0; for (long i = 1; i <= my ny; i++) { p += my s[i][j]; } if (p > maxPower) { maxPower = p; } power[j] = p; } if (maxPower == 0.0) { Melder_throw (U"No power"); } for (long j = 1; j <= my nx; j++) { Pitch_Frame pitchFrame = &thy frame[j]; pitchFrame -> intensity = power[j] / maxPower; for (long i = 1; i <= my ny; i++) { y[i] = my s[i][j]; } NUMspline (fl2.peek(), y.peek(), my ny, 1e30, 1e30, yv2.peek()); for (long k = 1; k <= nFrequencyPoints; k++) { double f = fminl2 + (k - 1) * dfl2; NUMsplint (fl2.peek(), y.peek(), yv2.peek(), my ny, f, & pitch[k]); sumspec[k] = 0.0; } // Formula (8): weighted harmonic summation. for (long m = 1; m <= maxHarmonic; m++) { double hm = 1 - harmonicFallOffSlope * NUMlog2 (m); long kb = 1 + (long) floor (nPointsPerOctave * NUMlog2 (m)); for (long k = kb; k <= nFrequencyPoints; k++) { if (pitch[k] > 0.0) { sumspec[k - kb + 1] += pitch[k] * hm; } } } // into Pitch object Pitch_Frame_init (pitchFrame, maxnCandidates); pitchFrame -> nCandidates = 0; /* !!!!! */ Pitch_Frame_addPitch (pitchFrame, 0, 0, maxnCandidates); /* unvoiced */ for (long k = 2; k <= nFrequencyPoints - 1; k++) { double y1 = sumspec[k - 1], y2 = sumspec[k], y3 = sumspec[k + 1]; if (y2 > y1 && y2 >= y3) { double denum = y1 - 2.0 * y2 + y3, tmp = y3 - 4.0 * y2; double x = dfl2 * (y1 - y3) / (2 * denum); double f = pow (2.0, fminl2 + (k - 1) * dfl2 + x); double strength = (2.0 * y1 * (4.0 * y2 + y3) - y1 * y1 - tmp * tmp) / (8.0 * denum); if (strength > maxStrength) { maxStrength = strength; } Pitch_Frame_addPitch (pitchFrame, f, strength, maxnCandidates); } } } // Scale the pitch strengths for (long j = 1; j <= my nx; j++) { double f0, localStrength; Pitch_Frame_getPitch (&thy frame[j], &f0, &localStrength); Pitch_Frame_resizeStrengths (&thy frame[j], localStrength / maxStrength, unvoicedCriterium); } return thee; } catch (MelderError) { Melder_throw (me, U": no Pitch created."); } }
void Confusion_getEntropies (Confusion me, double *p_h, double *p_hx, double *p_hy, double *p_hygx, double *p_hxgy, double *p_uygx, double *p_uxgy, double *p_uxy) { double h = 0.0, hx = 0.0, hy = 0.0, hxgy = 0.0, hygx = 0.0, uygx = 0.0, uxgy = 0.0, uxy = 0.0; autoNUMvector<double> rowSum (1, my numberOfRows); autoNUMvector<double> colSum (1, my numberOfColumns); double sum = 0.0; for (long i = 1; i <= my numberOfRows; i++) { for (long j = 1; j <= my numberOfColumns; j++) { rowSum[i] += my data[i][j]; colSum[j] += my data[i][j]; sum += my data[i][j]; } } for (long i = 1; i <= my numberOfRows; i++) { if (rowSum[i] > 0.0) { hy -= rowSum[i] / sum * NUMlog2 (rowSum[i] / sum); } } for (long j = 1; j <= my numberOfColumns; j++) { if (colSum[j] > 0.0) { hx -= colSum[j] / sum * NUMlog2 (colSum[j] / sum); } } for (long i = 1; i <= my numberOfRows; i++) { for (long j = 1; j <= my numberOfColumns; j++) { if (my data[i][j] > 0.0) { h -= my data[i][j] / sum * NUMlog2 (my data[i][j] / sum); } } } hygx = h - hx; hxgy = h - hy; uygx = (hy - hygx) / (hy + TINY); uxgy = (hx - hxgy) / (hx + TINY); uxy = 2.0 * (hx + hy - h) / (hx + hy + TINY); if (p_h) { *p_h = h; } if (p_hx) { *p_hx = hx; } if (p_hy) { *p_hy = hy; } if (p_hygx) { *p_hygx = hygx; } if (p_hxgy) { *p_hxgy = hxgy; } if (p_uygx) { *p_uygx = uygx; } if (p_uxgy) { *p_uxgy = uxgy; } if (p_uxy) { *p_uxy = uxy; } }
static void Sound_into_PitchFrame (Sound me, Pitch_Frame pitchFrame, double t, double minimumPitch, int maxnCandidates, int method, double voicingThreshold, double octaveCost, NUMfft_Table fftTable, double dt_window, long nsamp_window, long halfnsamp_window, long maximumLag, long nsampFFT, long nsamp_period, long halfnsamp_period, long brent_ixmax, long brent_depth, double globalPeak, double **frame, double *ac, double *window, double *windowR, double *r, long *imax, double *localMean) { double localPeak; long leftSample = Sampled_xToLowIndex (me, t), rightSample = leftSample + 1; long startSample, endSample; for (long channel = 1; channel <= my ny; channel ++) { /* * Compute the local mean; look one longest period to both sides. */ startSample = rightSample - nsamp_period; endSample = leftSample + nsamp_period; Melder_assert (startSample >= 1); Melder_assert (endSample <= my nx); localMean [channel] = 0.0; for (long i = startSample; i <= endSample; i ++) { localMean [channel] += my z [channel] [i]; } localMean [channel] /= 2 * nsamp_period; /* * Copy a window to a frame and subtract the local mean. * We are going to kill the DC component before windowing. */ startSample = rightSample - halfnsamp_window; endSample = leftSample + halfnsamp_window; Melder_assert (startSample >= 1); Melder_assert (endSample <= my nx); if (method < FCC_NORMAL) { for (long j = 1, i = startSample; j <= nsamp_window; j ++) frame [channel] [j] = (my z [channel] [i ++] - localMean [channel]) * window [j]; for (long j = nsamp_window + 1; j <= nsampFFT; j ++) frame [channel] [j] = 0.0; } else { for (long j = 1, i = startSample; j <= nsamp_window; j ++) frame [channel] [j] = my z [channel] [i ++] - localMean [channel]; } } /* * Compute the local peak; look half a longest period to both sides. */ localPeak = 0.0; if ((startSample = halfnsamp_window + 1 - halfnsamp_period) < 1) startSample = 1; if ((endSample = halfnsamp_window + halfnsamp_period) > nsamp_window) endSample = nsamp_window; for (long channel = 1; channel <= my ny; channel ++) { for (long j = startSample; j <= endSample; j ++) { double value = fabs (frame [channel] [j]); if (value > localPeak) localPeak = value; } } pitchFrame->intensity = localPeak > globalPeak ? 1.0 : localPeak / globalPeak; /* * Compute the correlation into the array 'r'. */ if (method >= FCC_NORMAL) { double startTime = t - 0.5 * (1.0 / minimumPitch + dt_window); long localSpan = maximumLag + nsamp_window, localMaximumLag, offset; if ((startSample = Sampled_xToLowIndex (me, startTime)) < 1) startSample = 1; if (localSpan > my nx + 1 - startSample) localSpan = my nx + 1 - startSample; localMaximumLag = localSpan - nsamp_window; offset = startSample - 1; double sumx2 = 0; // sum of squares for (long channel = 1; channel <= my ny; channel ++) { double *amp = my z [channel] + offset; for (long i = 1; i <= nsamp_window; i ++) { double x = amp [i] - localMean [channel]; sumx2 += x * x; } } double sumy2 = sumx2; // at zero lag, these are still equal r [0] = 1.0; for (long i = 1; i <= localMaximumLag; i ++) { double product = 0.0; for (long channel = 1; channel <= my ny; channel ++) { double *amp = my z [channel] + offset; double y0 = amp [i] - localMean [channel]; double yZ = amp [i + nsamp_window] - localMean [channel]; sumy2 += yZ * yZ - y0 * y0; for (long j = 1; j <= nsamp_window; j ++) { double x = amp [j] - localMean [channel]; double y = amp [i + j] - localMean [channel]; product += x * y; } } r [- i] = r [i] = product / sqrt (sumx2 * sumy2); } } else { /* * The FFT of the autocorrelation is the power spectrum. */ for (long i = 1; i <= nsampFFT; i ++) { ac [i] = 0.0; } for (long channel = 1; channel <= my ny; channel ++) { NUMfft_forward (fftTable, frame [channel]); // complex spectrum ac [1] += frame [channel] [1] * frame [channel] [1]; // DC component for (long i = 2; i < nsampFFT; i += 2) { ac [i] += frame [channel] [i] * frame [channel] [i] + frame [channel] [i+1] * frame [channel] [i+1]; // power spectrum } ac [nsampFFT] += frame [channel] [nsampFFT] * frame [channel] [nsampFFT]; // Nyquist frequency } NUMfft_backward (fftTable, ac); /* Autocorrelation. */ /* * Normalize the autocorrelation to the value with zero lag, * and divide it by the normalized autocorrelation of the window. */ r [0] = 1.0; for (long i = 1; i <= brent_ixmax; i ++) r [- i] = r [i] = ac [i + 1] / (ac [1] * windowR [i + 1]); } /* * Register the first candidate, which is always present: voicelessness. */ pitchFrame->nCandidates = 1; pitchFrame->candidate[1].frequency = 0.0; // voiceless: always present pitchFrame->candidate[1].strength = 0.0; /* * Shortcut: absolute silence is always voiceless. * We are done for this frame. */ if (localPeak == 0) return; /* * Find the strongest maxima of the correlation of this frame, * and register them as candidates. */ imax [1] = 0; for (long i = 2; i < maximumLag && i < brent_ixmax; i ++) if (r [i] > 0.5 * voicingThreshold && // not too unvoiced? r [i] > r [i-1] && r [i] >= r [i+1]) // maximum? { int place = 0; /* * Use parabolic interpolation for first estimate of frequency, * and sin(x)/x interpolation to compute the strength of this frequency. */ double dr = 0.5 * (r [i+1] - r [i-1]), d2r = 2 * r [i] - r [i-1] - r [i+1]; double frequencyOfMaximum = 1 / my dx / (i + dr / d2r); long offset = - brent_ixmax - 1; double strengthOfMaximum = /* method & 1 ? */ NUM_interpolate_sinc (& r [offset], brent_ixmax - offset, 1 / my dx / frequencyOfMaximum - offset, 30) /* : r [i] + 0.5 * dr * dr / d2r */; /* High values due to short windows are to be reflected around 1. */ if (strengthOfMaximum > 1.0) strengthOfMaximum = 1.0 / strengthOfMaximum; /* * Find a place for this maximum. */ if (pitchFrame->nCandidates < maxnCandidates) { // is there still a free place? place = ++ pitchFrame->nCandidates; } else { /* Try the place of the weakest candidate so far. */ double weakest = 2; for (int iweak = 2; iweak <= maxnCandidates; iweak ++) { /* High frequencies are to be favoured */ /* if we want to analyze a perfectly periodic signal correctly. */ double localStrength = pitchFrame->candidate[iweak].strength - octaveCost * NUMlog2 (minimumPitch / pitchFrame->candidate[iweak].frequency); if (localStrength < weakest) { weakest = localStrength; place = iweak; } } /* If this maximum is weaker than the weakest candidate so far, give it no place. */ if (strengthOfMaximum - octaveCost * NUMlog2 (minimumPitch / frequencyOfMaximum) <= weakest) place = 0; } if (place) { // have we found a place for this candidate? pitchFrame->candidate[place].frequency = frequencyOfMaximum; pitchFrame->candidate[place].strength = strengthOfMaximum; imax [place] = i; } } /* * Second pass: for extra precision, maximize sin(x)/x interpolation ('sinc'). */ for (long i = 2; i <= pitchFrame->nCandidates; i ++) { if (method != AC_HANNING || pitchFrame->candidate[i].frequency > 0.0 / my dx) { double xmid, ymid; long offset = - brent_ixmax - 1; ymid = NUMimproveMaximum (& r [offset], brent_ixmax - offset, imax [i] - offset, pitchFrame->candidate[i].frequency > 0.3 / my dx ? NUM_PEAK_INTERPOLATE_SINC700 : brent_depth, & xmid); xmid += offset; pitchFrame->candidate[i].frequency = 1.0 / my dx / xmid; if (ymid > 1.0) ymid = 1.0 / ymid; pitchFrame->candidate[i].strength = ymid; } } }
void Pitch_pathFinder (Pitch me, double silenceThreshold, double voicingThreshold, double octaveCost, double octaveJumpCost, double voicedUnvoicedCost, double ceiling, int pullFormants) { if (Melder_debug == 33) Melder_casual (U"Pitch path finder:" U"\nSilence threshold = ", silenceThreshold, U"\nVoicing threshold = ", voicingThreshold, U"\nOctave cost = ", octaveCost, U"\nOctave jump cost = ", octaveJumpCost, U"\nVoiced/unvoiced cost = ", voicedUnvoicedCost, U"\nCeiling = ", ceiling, U"\nPull formants = ", pullFormants); try { long maxnCandidates = Pitch_getMaxnCandidates (me); long place; volatile double maximum, value; double ceiling2 = pullFormants ? 2 * ceiling : ceiling; /* Next three lines 20011015 */ double timeStepCorrection = 0.01 / my dx; octaveJumpCost *= timeStepCorrection; voicedUnvoicedCost *= timeStepCorrection; my ceiling = ceiling; autoNUMmatrix <double> delta (1, my nx, 1, maxnCandidates); autoNUMmatrix <long> psi (1, my nx, 1, maxnCandidates); for (long iframe = 1; iframe <= my nx; iframe ++) { Pitch_Frame frame = & my frame [iframe]; double unvoicedStrength = silenceThreshold <= 0 ? 0 : 2 - frame->intensity / (silenceThreshold / (1 + voicingThreshold)); unvoicedStrength = voicingThreshold + (unvoicedStrength > 0 ? unvoicedStrength : 0); for (long icand = 1; icand <= frame->nCandidates; icand ++) { Pitch_Candidate candidate = & frame->candidate [icand]; int voiceless = candidate->frequency == 0 || candidate->frequency > ceiling2; delta [iframe] [icand] = voiceless ? unvoicedStrength : candidate->strength - octaveCost * NUMlog2 (ceiling / candidate->frequency); } } /* Look for the most probable path through the maxima. */ /* There is a cost for the voiced/unvoiced transition, */ /* and a cost for a frequency jump. */ for (long iframe = 2; iframe <= my nx; iframe ++) { Pitch_Frame prevFrame = & my frame [iframe - 1], curFrame = & my frame [iframe]; double *prevDelta = delta [iframe - 1], *curDelta = delta [iframe]; long *curPsi = psi [iframe]; for (long icand2 = 1; icand2 <= curFrame -> nCandidates; icand2 ++) { double f2 = curFrame -> candidate [icand2]. frequency; maximum = -1e30; place = 0; for (long icand1 = 1; icand1 <= prevFrame -> nCandidates; icand1 ++) { double f1 = prevFrame -> candidate [icand1]. frequency; double transitionCost; bool previousVoiceless = f1 <= 0 || f1 >= ceiling2; bool currentVoiceless = f2 <= 0 || f2 >= ceiling2; if (currentVoiceless) { if (previousVoiceless) { transitionCost = 0; // both voiceless } else { transitionCost = voicedUnvoicedCost; // voiced-to-unvoiced transition } } else { if (previousVoiceless) { transitionCost = voicedUnvoicedCost; // unvoiced-to-voiced transition if (Melder_debug == 30) { /* * Try to take into account a frequency jump across a voiceless stretch. */ long place1 = icand1; for (long jframe = iframe - 2; jframe >= 1; jframe --) { place1 = psi [jframe + 1] [place1]; f1 = my frame [jframe]. candidate [place1]. frequency; if (f1 > 0 && f1 < ceiling) { transitionCost += octaveJumpCost * fabs (NUMlog2 (f1 / f2)) / (iframe - jframe); break; } } } } else { transitionCost = octaveJumpCost * fabs (NUMlog2 (f1 / f2)); // both voiced } } value = prevDelta [icand1] - transitionCost + curDelta [icand2]; //if (Melder_debug == 33) Melder_casual ("Frame %ld, current candidate %ld (delta %g), previous candidate %ld (delta %g), " // "transition cost %g, value %g, maximum %g", iframe, icand2, curDelta [icand2], icand1, prevDelta [icand1], transitionCost, value, maximum); if (value > maximum) { maximum = value; place = icand1; } else if (value == maximum) { if (Melder_debug == 33) Melder_casual ( U"A tie in frame ", iframe, U", current candidate ", icand2, U", previous candidate ", icand1 ); } } curDelta [icand2] = maximum; curPsi [icand2] = place; } } /* Find the end of the most probable path. */ place = 1; maximum = delta [my nx] [place]; for (long icand = 2; icand <= my frame [my nx]. nCandidates; icand ++) { if (delta [my nx] [icand] > maximum) { place = icand; maximum = delta [my nx] [place]; } } /* Backtracking: follow the path backwards. */ for (long iframe = my nx; iframe >= 1; iframe --) { if (Melder_debug == 33) Melder_casual ( U"Frame ", iframe, U":", U" swapping candidates 1 and ", place ); Pitch_Frame frame = & my frame [iframe]; structPitch_Candidate help = frame -> candidate [1]; frame -> candidate [1] = frame -> candidate [place]; frame -> candidate [place] = help; place = psi [iframe] [place]; // This assignment is challenging to CodeWarrior 11. } /* Pull formants: devoice frames with frequencies between ceiling and ceiling2. */ if (ceiling2 > ceiling) { if (Melder_debug == 33) Melder_casual (U"Pulling formants..."); for (long iframe = my nx; iframe >= 1; iframe --) { Pitch_Frame frame = & my frame [iframe]; Pitch_Candidate winner = & frame -> candidate [1]; double f = winner -> frequency; if (f > ceiling && f <= ceiling2) { for (long icand = 2; icand <= frame -> nCandidates; icand ++) { Pitch_Candidate loser = & frame -> candidate [icand]; if (loser -> frequency == 0.0) { structPitch_Candidate help = * winner; * winner = * loser; * loser = help; break; } } } } } } catch (MelderError) { Melder_throw (me, U": path not found."); } }
Pitch Sound_to_Pitch_any (Sound me, double dt, /*timeStepStradygy related*/ double minimumPitch, /*Pitch settings realted*/ double periodsPerWindow, /*kTimeSoundAnalysisEditor_pitch_analysisMethod related*/ int maxnCandidates, int method, /*method related*/ double silenceThreshold, double voicingThreshold, double octaveCost, double octaveJumpCost, double voicedUnvoicedCost, double ceiling) { NUMfft_Table fftTable = NUMfft_Table_create(); double duration, t1; double dt_window; /* Window length in seconds. */ long nsamp_window, halfnsamp_window; /* Number of samples per window. */ long nFrames, minimumLag, maximumLag; long iframe, nsampFFT; double interpolation_depth; long nsamp_period, halfnsamp_period; /* Number of samples in longest period. */ long brent_ixmax, brent_depth; double brent_accuracy; /* Obsolete. */ double globalPeak; if (maxnCandidates < 2 || method < AC_HANNING && method > FCC_ACCURATE) { std::cout<<"Error: maxnCandidates: "<<maxnCandidates<<" method: "<<method<<"."<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 13. 69"<<std::endl; return NULL; } if (maxnCandidates < ceiling / minimumPitch) maxnCandidates = ceiling / minimumPitch; if (dt <= 0.0) dt = periodsPerWindow / minimumPitch / 4.0; /* e.g. 3 periods, 75 Hz: 10 milliseconds. */ switch (method) { case AC_HANNING: brent_depth = NUM_PEAK_INTERPOLATE_SINC70; brent_accuracy = 1e-7; interpolation_depth = 0.5; break; case AC_GAUSS: periodsPerWindow *= 2; /* Because Gaussian window is twice as long. */ brent_depth = NUM_PEAK_INTERPOLATE_SINC700; brent_accuracy = 1e-11; interpolation_depth = 0.25; /* Because Gaussian window is twice as long. */ break; case FCC_NORMAL: brent_depth = NUM_PEAK_INTERPOLATE_SINC70; brent_accuracy = 1e-7; interpolation_depth = 1.0; break; case FCC_ACCURATE: brent_depth = NUM_PEAK_INTERPOLATE_SINC700; brent_accuracy = 1e-11; interpolation_depth = 1.0; break; } duration = my dx * my nx; if (minimumPitch < periodsPerWindow / duration) { std::cout<<"To analyse this Sound, minimum pitch must not be less than "<< periodsPerWindow / duration<<" Hz."<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.103"<<std::endl; return NULL; } /* * Determine the number of samples in the longest period. * We need this to compute the local mean of the sound (looking one period in both directions), * and to compute the local peak of the sound (looking half a period in both directions). */ nsamp_period = floor(1 / my dx / minimumPitch); halfnsamp_period = nsamp_period / 2 + 1; if (ceiling > 0.5 / my dx) ceiling = 0.5 / my dx; // Determine window length in seconds and in samples. dt_window = periodsPerWindow / minimumPitch; nsamp_window = floor (dt_window / my dx); halfnsamp_window = nsamp_window / 2 - 1; if (halfnsamp_window < 2){ std::cout<<"Analysis window too short."<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.123"<<std::endl; return NULL; } nsamp_window = halfnsamp_window * 2; // Determine the minimum and maximum lags. minimumLag = floor (1 / my dx / ceiling); if (minimumLag < 2) minimumLag = 2; maximumLag = floor (nsamp_window / periodsPerWindow) + 2; if (maximumLag > nsamp_window) maximumLag = nsamp_window; /* * Determine the number of frames. * Fit as many frames as possible symmetrically in the total duration. * We do this even for the forward cross-correlation method, * because that allows us to compare the two methods. */ if(!Sampled_shortTermAnalysis (me, method >= FCC_NORMAL ? 1 / minimumPitch + dt_window : dt_window, dt, & nFrames, & t1)){ std::cout<<"The pitch analysis would give zero pitch frames."<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.142"<<std::endl; return NULL; } // Create the resulting pitch contour. Pitch thee = Pitch_create (my xmin, my xmax, nFrames, dt, t1, ceiling, maxnCandidates); // Compute the global absolute peak for determination of silence threshold. globalPeak = 0.0; for (long channel = 1; channel <= my ny; channel ++) { double mean = 0.0; for (long i = 1; i <= my nx; i ++) { mean += my z [channel] [i]; } mean /= my nx; for (long i = 1; i <= my nx; i ++) { double value = fabs (my z [channel] [i] - mean); if (value > globalPeak) globalPeak = value; } } if (globalPeak == 0.0) return thee; double **frame, *ac, *window, *windowR; if (method >= FCC_NORMAL) { /* For cross-correlation analysis. */ // Create buffer for cross-correlation analysis. frame = (double **)malloc(sizeof(double *) * (my ny + 1)); for(long i = 1; i <= my ny; ++ i){ frame[i] = (double *)malloc(sizeof(double) * (nsamp_window + 1)); for(long j = 1; j <= nsamp_window; ++ j) frame[i][j] = 0.0; } /****frame.reset (1, my ny, 1, nsamp_window);****/ brent_ixmax = nsamp_window * interpolation_depth; } else { /* For autocorrelation analysis. */ /* * Compute the number of samples needed for doing FFT. * To avoid edge effects, we have to append zeroes to the window. * The maximum lag considered for maxima is maximumLag. * The maximum lag used in interpolation is nsamp_window * interpolation_depth. */ nsampFFT = 1; while (nsampFFT < nsamp_window * (1 + interpolation_depth)) nsampFFT *= 2; // Create buffers for autocorrelation analysis. frame = (double **)malloc(sizeof(double *) * (my ny + 1)); for(long i = 1; i <= my ny; ++ i){ frame [i] = (double *)malloc(sizeof(double) * (nsampFFT + 1)); for(long j = 0; j <= nsampFFT; ++ j) frame[i][j] = 0.0; } /****frame.reset (1, my ny, 1, nsampFFT);****/ window = (double *)malloc(sizeof(double) * (nsamp_window + 1)); for(long i = 0; i <= nsamp_window; ++ i) window[i] = 0.0; /****window.reset (1, nsamp_window);****/ windowR = (double *)malloc(sizeof(double) * (nsampFFT + 1)); ac = (double *)malloc(sizeof(double) * (nsampFFT + 1)); for(long i = 0; i <= nsampFFT; ++ i) windowR[i] = ac[i] = 0.0; /****windowR.reset (1, nsampFFT); ac.reset (1, nsampFFT); ****/ NUMfft_Table_init (fftTable, nsampFFT); /* * A Gaussian or Hanning window is applied against phase effects. * The Hanning window is 2 to 5 dB better for 3 periods/window. * The Gaussian window is 25 to 29 dB better for 6 periods/window. */ if (method == AC_GAUSS) { /* Gaussian window. */ double imid = 0.5 * (nsamp_window + 1), edge = exp (-12.0); for (long i = 1; i <= nsamp_window; i ++) window[i] = (exp(-48.0*(i-imid)*(i-imid) / (nsamp_window + 1) / (nsamp_window + 1)) - edge) / (1 - edge); } else { /* Hanning window*/ for (long i = 1; i <= nsamp_window; i ++) window [i] = 0.5 - 0.5 * cos (i * 2 * NUMpi / (nsamp_window + 1)); } // Compute the normalized autocorrelation of the window. for (long i = 1; i <= nsamp_window; i ++) windowR [i] = window [i]; NUMfft_forward (fftTable, windowR); windowR [1] *= windowR [1]; // DC component for (long i = 2; i < nsampFFT; i += 2) { windowR [i] = windowR [i] * windowR [i] + windowR [i+1] * windowR [i+1]; windowR [i + 1] = 0.0; // power spectrum: square and zero } windowR [nsampFFT] *= windowR [nsampFFT]; // Nyquist frequency NUMfft_backward (fftTable, windowR); // autocorrelation for (long i = 2; i <= nsamp_window; i ++) windowR [i] /= windowR [1]; // normalize windowR [1] = 1.0; // normalize brent_ixmax = nsamp_window * interpolation_depth; } double *r = (double *) malloc( sizeof(double) * (2 * (nsamp_window + 1) + 1) ); r += nsamp_window + 1; //make "r" become a symetrical vectr long *imax = (long *) malloc( sizeof(long) * (maxnCandidates + 1)); double *localMean = (double *) malloc( sizeof(double) * (my ny + 1)); for(iframe = 1; iframe <= nFrames; iframe ++){ Pitch_Frame pitchFrame = & thy frame[iframe]; double t = thy x1 + (iframe - 1) *(thy dx), localPeak; long leftSample = (long) floor((t - my x1) / my dx) + 1; long rightSample = leftSample + 1; long startSample, endSample; for(long channel = 1; channel <= my ny; ++ channel){ //Compute the local mean; look one longest period to both sides. startSample = rightSample - nsamp_period; endSample = leftSample + nsamp_period; if ( startSample < 0 ) { std::cout<<"StartSample < 1"<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31"<<std::endl; return NULL; } if (endSample > my nx){ std::cout<<"EndSample > my nx"<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.262"<<std::endl; return NULL; } localMean[channel] = 0.0; for (long i = startSample; i <= endSample; i ++) { localMean[channel] += my z[channel][i]; } localMean[channel] /= 2 * nsamp_period; // Copy a window to a frame and subtract the local mean. We are going to kill the DC component before windowing. startSample = rightSample - halfnsamp_window; endSample = leftSample + halfnsamp_window; if ( startSample < 1 ) { std::cout<<"StartSample < 1"<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.281"<<std::endl; return NULL; } if (endSample > my nx){ std::cout<<"EndSample > my nx"<<std::endl; std::cout<<"Sound_to_Pitch.cpp: Line 31.287"<<std::endl; return NULL; } if (method < FCC_NORMAL) { for (long j = 1, i = startSample; j <= nsamp_window; j ++) frame [channel] [j] = (my z [channel] [i ++] - localMean [channel]) * window [j]; for (long j = nsamp_window + 1; j <= nsampFFT; j ++) frame [channel] [j] = 0.0; } else { for (long j = 1, i = startSample; j <= nsamp_window; j ++) frame [channel] [j] = my z [channel] [i ++] - localMean [channel]; } } // Compute the local peak; look half a longest period to both sides. localPeak = 0.0; if ((startSample = halfnsamp_window + 1 - halfnsamp_period) < 1) startSample = 1; if ((endSample = halfnsamp_window + halfnsamp_period) > nsamp_window) endSample = nsamp_window; for (long channel = 1; channel <= my ny; channel ++) { for (long j = startSample; j <= endSample; j ++) { double value = fabs (frame [channel] [j]); if (value > localPeak) localPeak = value; } } pitchFrame->intensity = localPeak > globalPeak ? 1.0 : localPeak / globalPeak; // Compute the correlation into the array 'r'. if (method >= FCC_NORMAL) { double startTime = t - 0.5 * (1.0 / minimumPitch + dt_window); long localSpan = maximumLag + nsamp_window, localMaximumLag, offset; if ((startSample = (long) floor ((startTime - my x1) / my dx)) + 1 < 1) startSample = 1; if (localSpan > my nx + 1 - startSample) localSpan = my nx + 1 - startSample; localMaximumLag = localSpan - nsamp_window; offset = startSample - 1; double sumx2 = 0; /* Sum of squares. */ for (long channel = 1; channel <= my ny; channel ++) { ///channel = 1; channel <= my ny double *amp = my z[channel] + offset; for (long i = 1; i <= nsamp_window; i ++) { ///i = 1; i <= nsamp_window double x = amp[i] - localMean[channel]; sumx2 += x * x; } } double sumy2 = sumx2; /* At zero lag, these are still equal. */ r[0] = 1.0; for (long i = 1; i <= localMaximumLag; i ++) { double product = 0.0; for (long channel = 1; channel <= my ny; channel ++) { ///channel = 1; channel <= my ny double *amp = my z[channel] + offset; double y0 = amp[i] - localMean[channel]; double yZ = amp[i + nsamp_window] - localMean[channel]; sumy2 += yZ * yZ - y0 * y0; for (long j = 1; j <= nsamp_window; j ++) { ///j = 1; j <= nsamp_window double x = amp[j] - localMean[channel]; double y = amp[i + j] - localMean[channel]; product += x * y; } } r[- i] = r[i] = product / sqrt (sumx2 * sumy2); } } else { // The FFT of the autocorrelation is the power spectrum. for (long i = 1; i <= nsampFFT; i ++) ac [i] = 0.0; for (long channel = 1; channel <= my ny; channel ++) { NUMfft_forward (fftTable, frame [channel]); /* Complex spectrum. */ ac [1] += frame [channel] [1] * frame [channel] [1]; /* DC component. */ for (long i = 2; i < nsampFFT; i += 2) { ac [i] += frame [channel] [i] * frame [channel] [i] + frame [channel] [i+1] * frame [channel] [i+1]; /* Power spectrum. */ } ac [nsampFFT] += frame [channel] [nsampFFT] * frame [channel] [nsampFFT]; /* Nyquist frequency. */ } NUMfft_backward (fftTable, ac); /* Autocorrelation. */ /* * Normalize the autocorrelation to the value with zero lag, * and divide it by the normalized autocorrelation of the window. */ r [0] = 1.0; for (long i = 1; i <= brent_ixmax; i ++) r [- i] = r [i] = ac [i + 1] / (ac [1] * windowR [i + 1]); } // Create (too much) space for candidates Pitch_Frame_init (pitchFrame, maxnCandidates); // Register the first candidate, which is always present: voicelessness. pitchFrame->nCandidates = 1; pitchFrame->candidate[1].frequency = 0.0; /* Voiceless: always present. */ pitchFrame->candidate[1].strength = 0.0; /* * Shortcut: absolute silence is always voiceless. * Go to next frame. */ if (localPeak == 0) continue; /* * Find the strongest maxima of the correlation of this frame, * and register them as candidates. */ imax[1] = 0; for (long i = 2; i < maximumLag && i < brent_ixmax; i ++) if (r[i] > 0.5 * voicingThreshold && /* Not too unvoiced? */ r[i] > r[i-1] && r[i] >= r[i+1]) /* Maximum ? */ { int place = 0; // Use parabolic interpolation for first estimate of frequency,and sin(x)/x interpolation to compute the strength of this frequency. double dr = 0.5 * (r[i+1] - r[i-1]); double d2r = 2 * r[i] - r[i-1] - r[i+1]; double frequencyOfMaximum = 1 / my dx / (i + dr / d2r); long offset = - brent_ixmax - 1; double strengthOfMaximum = /* method & 1 ? */ NUM_interpolate_sinc (& r[offset], brent_ixmax - offset, 1 / my dx / frequencyOfMaximum - offset, 30) /* : r [i] + 0.5 * dr * dr / d2r */; /* High values due to short windows are to be reflected around 1. */ if (strengthOfMaximum > 1.0) strengthOfMaximum = 1.0 / strengthOfMaximum; // Find a place for this maximum. if (pitchFrame->nCandidates < thy maxnCandidates) { /* Is there still a free place? */ place = ++ pitchFrame->nCandidates; } else { /* Try the place of the weakest candidate so far. */ double weakest = 2; for (int iweak = 2; iweak <= thy maxnCandidates; iweak ++) { //iweak = 2; iweak <= thy maxnCandidates; /* High frequencies are to be favoured */ /* if we want to analyze a perfectly periodic signal correctly. */ double localStrength = pitchFrame->candidate[iweak].strength - octaveCost * NUMlog2 (minimumPitch / pitchFrame->candidate[iweak].frequency); if (localStrength < weakest) { weakest = localStrength; place = iweak; } } /* If this maximum is weaker than the weakest candidate so far, give it no place. */ if (strengthOfMaximum - octaveCost * NUMlog2 (minimumPitch / frequencyOfMaximum) <= weakest) place = 0; } if (place) { /* Have we found a place for this candidate? */ pitchFrame->candidate[place].frequency = frequencyOfMaximum; pitchFrame->candidate[place].strength = strengthOfMaximum; imax [place] = i; } } // Second pass: for extra precision, maximize sin(x)/x interpolation ('sinc'). for (long i = 2; i <= pitchFrame->nCandidates; i ++) { if (method != AC_HANNING || pitchFrame->candidate[i].frequency > 0.0 / my dx) { double xmid, ymid; long offset = - brent_ixmax - 1; ymid = NUMimproveMaximum (& r[offset], brent_ixmax - offset, imax[i] - offset, pitchFrame->candidate[i].frequency > 0.3 / my dx ? NUM_PEAK_INTERPOLATE_SINC700 : brent_depth, & xmid); xmid += offset; pitchFrame->candidate[i].frequency = 1.0 / my dx / xmid; if (ymid > 1.0) ymid = 1.0 / ymid; pitchFrame->candidate[i].strength = ymid; } } } /* Next frame. */ Pitch_pathFinder (thee, silenceThreshold, voicingThreshold,octaveCost, octaveJumpCost, voicedUnvoicedCost, ceiling, false); //false: Melder_debug == 31 ? true : false Melder_debug 31: Pitch analysis: formant pulling on return thee; }
Pitch SPINET_to_Pitch (SPINET me, double harmonicFallOffSlope, double ceiling, int maxnCandidates) { Pitch thee = NULL; long i, j, k, m, nPointsPerOctave = 48; double fmin = NUMerbToHertz (Sampled2_rowToY (me, 1)); double fmax = NUMerbToHertz (Sampled2_rowToY (me, my ny)); double fminl2 = NUMlog2 (fmin), fmaxl2 = NUMlog2 (fmax); double points = (fmaxl2 - fminl2) * nPointsPerOctave; double dfl2 = (fmaxl2 - fminl2) / (points - 1); long nFrequencyPoints = points; long maxHarmonic = fmax / fmin; double maxStrength = 0, unvoicedCriterium = 0.45; double maxPower = 0, *sumspec = NULL, *power = NULL; double *y = NULL, *y2 = NULL, *pitch = NULL, *fl2 = NULL; if (nFrequencyPoints < 2) return Melder_errorp1 (L"SPINET_to_Pitch: frequency range too small."); if (ceiling <= fmin) return Melder_errorp1 (L"SPINET_to_Pitch: ceiling is smaller than centre " "frequency of lowest filter."); if (! (thee = Pitch_create (my xmin, my xmax, my nx, my dx, my x1, ceiling, maxnCandidates)) || ! (power = NUMdvector (1, my nx)) || ! (pitch = NUMdvector (1, nFrequencyPoints)) || ! (sumspec = NUMdvector (1, nFrequencyPoints)) || ! (y = NUMdvector (1, my ny)) || ! (y2 = NUMdvector (1, my ny)) || ! (fl2 = NUMdvector (1, my ny))) goto cleanup; /* From ERB's to log (f) */ for (i=1; i <= my ny; i++) { double f = NUMerbToHertz (my y1 + (i - 1) * my dy); fl2[i] = NUMlog2 (f); } /* Determine global maximum power in frame */ for (j=1; j <= my nx; j++) { double p = 0; for (i=1; i <= my ny; i++) p += my s[i][j]; if (p > maxPower) maxPower = p; power[j] = p; } if (maxPower == 0) goto cleanup; for (j=1; j <= my nx; j++) { Pitch_Frame pitchFrame = &thy frame[j]; pitchFrame->intensity = power[j] / maxPower; for (i=1; i <= my ny; i++) y[i] = my s[i][j]; if (! NUMspline (fl2, y, my ny, 1e30, 1e30, y2)) goto cleanup; for (k=1; k <= nFrequencyPoints; k++) { double f = fminl2 + (k-1) * dfl2; NUMsplint (fl2, y, y2, my ny, f, & pitch[k]); sumspec[k] = 0; } /* Formula (8): weighted harmonic summation. */ for (m=1; m <= maxHarmonic; m++) { double hm = 1 - harmonicFallOffSlope * NUMlog2 (m); long kb = 1 + floor (nPointsPerOctave * NUMlog2 (m)); for (k=kb; k <= nFrequencyPoints; k++) { if (pitch[k] > 0) sumspec[k-kb+1] += pitch[k] * hm; } } /* into Pitch object */ if (! Pitch_Frame_init (pitchFrame, maxnCandidates)) goto cleanup; pitchFrame->nCandidates = 0; /* !!!!! */ Pitch_Frame_addPitch (pitchFrame, 0, 0, maxnCandidates); /* unvoiced */ for (k=2; k <= nFrequencyPoints-1; k++) { double y1 = sumspec[k-1], y2 = sumspec[k], y3 = sumspec[k+1]; if (y2 > y1 && y2 >= y3) { double denum = y1 - 2 * y2 + y3, tmp = y3 - 4 * y2; double x = dfl2 * (y1 - y3) / (2 * denum); double f = pow (2, fminl2 + (k - 1) * dfl2 + x); double strength = (2 * y1 * (4 * y2 + y3) - y1 * y1 - tmp * tmp) / (8 * denum); if (strength > maxStrength) maxStrength = strength; Pitch_Frame_addPitch (pitchFrame, f, strength, maxnCandidates); } } } /* Scale the pitch strengths */ for (j=1; j <= my nx; j++) { double f0, localStrength; Pitch_Frame_getPitch (&thy frame[j], &f0, &localStrength); Pitch_Frame_resizeStrengths (&thy frame[j], localStrength / maxStrength, unvoicedCriterium); } cleanup: NUMdvector_free (pitch, 1); NUMdvector_free (sumspec, 1); NUMdvector_free (y, 1); NUMdvector_free (y2, 1); NUMdvector_free (fl2, 1);NUMdvector_free (power, 1); if (! Melder_hasError()) return thee; forget (thee); return Melder_errorp1 (L"SPINET_to_Pitch: not performed."); }
Pitch Sound_to_Pitch_shs (Sound me, double timeStep, double minimumPitch, double maximumFrequency, double ceiling, long maxnSubharmonics, long maxnCandidates, double compressionFactor, long nPointsPerOctave) { try { double firstTime, newSamplingFrequency = 2 * maximumFrequency; double windowDuration = 2 / minimumPitch, halfWindow = windowDuration / 2; double atans = nPointsPerOctave * NUMlog2 (65.0 / 50.0) - 1; // Number of speech samples in the downsampled signal in each frame: // 100 for windowDuration == 0.04 and newSamplingFrequency == 2500 long nx = lround (windowDuration * newSamplingFrequency); // The minimum number of points for the fft is 256. long nfft = 1; while ( (nfft *= 2) < nx || nfft <= 128) { ; } long nfft2 = nfft / 2 + 1; double frameDuration = nfft / newSamplingFrequency; double df = newSamplingFrequency / nfft; // The number of points on the octave scale double fminl2 = NUMlog2 (minimumPitch), fmaxl2 = NUMlog2 (maximumFrequency); long nFrequencyPoints = (long) floor ((fmaxl2 - fminl2) * nPointsPerOctave); double dfl2 = (fmaxl2 - fminl2) / (nFrequencyPoints - 1); autoSound sound = Sound_resample (me, newSamplingFrequency, 50); long numberOfFrames; Sampled_shortTermAnalysis (sound.peek(), windowDuration, timeStep, &numberOfFrames, &firstTime); autoSound frame = Sound_createSimple (1, frameDuration, newSamplingFrequency); autoSound hamming = Sound_createHamming (nx / newSamplingFrequency, newSamplingFrequency); autoPitch thee = Pitch_create (my xmin, my xmax, numberOfFrames, timeStep, firstTime, ceiling, maxnCandidates); autoNUMvector<double> cc (1, numberOfFrames); autoNUMvector<double> specAmp (1, nfft2); autoNUMvector<double> fl2 (1, nfft2); autoNUMvector<double> yv2 (1, nfft2); autoNUMvector<double> arctg (1, nFrequencyPoints); autoNUMvector<double> al2 (1, nFrequencyPoints); Melder_assert (frame->nx >= nx); Melder_assert (hamming->nx == nx); // Compute the absolute value of the globally largest amplitude w.r.t. the global mean. double globalMean, globalPeak; Sound_localMean (sound.peek(), sound -> xmin, sound -> xmax, &globalMean); Sound_localPeak (sound.peek(), sound -> xmin, sound -> xmax, globalMean, &globalPeak); /* For the cubic spline interpolation we need the frequencies on an octave scale, i.e., a log2 scale. All frequencies must be DIFFERENT, otherwise the cubic spline interpolation will give corrupt results. Because log2(f==0) is not defined, we use the heuristic: f[2]-f[1] == f[3]-f[2]. */ for (long i = 2; i <= nfft2; i++) { fl2[i] = NUMlog2 ( (i - 1) * df); } fl2[1] = 2 * fl2[2] - fl2[3]; // Calculate frequencies regularly spaced on a log2-scale and // the frequency weighting function. for (long i = 1; i <= nFrequencyPoints; i++) { arctg[i] = 0.5 + atan (3 * (i - atans) / nPointsPerOctave) / NUMpi; } // Perform the analysis on all frames. for (long i = 1; i <= numberOfFrames; i++) { Pitch_Frame pitchFrame = &thy frame[i]; double hm = 1, f0, pitch_strength, localMean, localPeak; double tmid = Sampled_indexToX (thee.peek(), i); /* The center of this frame */ long nx_tmp = frame -> nx; // Copy a frame from the sound, apply a hamming window. Get local 'intensity' frame -> nx = nx; /*begin vies */ Sound_into_Sound (sound.peek(), frame.peek(), tmid - halfWindow); Sounds_multiply (frame.peek(), hamming.peek()); Sound_localMean (sound.peek(), tmid - 3 * halfWindow, tmid + 3 * halfWindow, &localMean); Sound_localPeak (sound.peek(), tmid - halfWindow, tmid + halfWindow, localMean, &localPeak); pitchFrame -> intensity = localPeak > globalPeak ? 1 : localPeak / globalPeak; frame -> nx = nx_tmp; /* einde vies */ // Get the Fourier spectrum. autoSpectrum spec = Sound_to_Spectrum (frame.peek(), 1); Melder_assert (spec->nx == nfft2); // From complex spectrum to amplitude spectrum. for (long j = 1; j <= nfft2; j++) { double rs = spec -> z[1][j], is = spec -> z[2][j]; specAmp[j] = sqrt (rs * rs + is * is); } // Enhance the peaks in the spectrum. spec_enhance_SHS (specAmp.peek(), nfft2); // Smooth the enhanced spectrum. spec_smoooth_SHS (specAmp.peek(), nfft2); // Go to a logarithmic scale and perform cubic spline interpolation to get // spectral values for the increased number of frequency points. NUMspline (fl2.peek(), specAmp.peek(), nfft2, 1e30, 1e30, yv2.peek()); for (long j = 1; j <= nFrequencyPoints; j++) { double f = fminl2 + (j - 1) * dfl2; NUMsplint (fl2.peek(), specAmp.peek(), yv2.peek(), nfft2, f, &al2[j]); } // Multiply by frequency selectivity of the auditory system. for (long j = 1; j <= nFrequencyPoints; j++) al2[j] = al2[j] > 0 ? al2[j] * arctg[j] : 0; // The subharmonic summation. Shift spectra in octaves and sum. Pitch_Frame_init (pitchFrame, maxnCandidates); autoNUMvector<double> sumspec (1, nFrequencyPoints); pitchFrame -> nCandidates = 0; /* !!!!! */ for (long m = 1; m <= maxnSubharmonics + 1; m++) { long kb = 1 + (long) floor (nPointsPerOctave * NUMlog2 (m)); for (long k = kb; k <= nFrequencyPoints; k++) { sumspec[k - kb + 1] += al2[k] * hm; } hm *= compressionFactor; } // First register the voiceless candidate (always present). Pitch_Frame_addPitch (pitchFrame, 0, 0, maxnCandidates); /* Get the best local estimates for the pitch as the maxima of the subharmonic sum spectrum by parabolic interpolation on three points: The formula for a parabole with a maximum is: y(x) = a - b (x - c)^2 with a, b, c >= 0 The three points are (-x, y1), (0, y2) and (x, y3). The solution for a (the maximum) and c (the position) is: a = (2 y1 (4 y2 + y3) - y1^2 - (y3 - 4 y2)^2)/( 8 (y1 - 2 y2 + y3) c = dx (y1 - y3) / (2 (y1 - 2 y2 + y3)) (b = (2 y2 - y1 - y3) / (2 dx^2) ) */ for (long k = 2; k <= nFrequencyPoints - 1; k++) { double y1 = sumspec[k - 1], y2 = sumspec[k], y3 = sumspec[k + 1]; if (y2 > y1 && y2 >= y3) { double denum = y1 - 2 * y2 + y3, tmp = y3 - 4 * y2; double x = dfl2 * (y1 - y3) / (2 * denum); double f = pow (2, fminl2 + (k - 1) * dfl2 + x); double strength = (2 * y1 * (4 * y2 + y3) - y1 * y1 - tmp * tmp) / (8 * denum); Pitch_Frame_addPitch (pitchFrame, f, strength, maxnCandidates); } } /* Check whether f0 corresponds to an actual periodicity T = 1 / f0: correlate two signal periods of duration T, one starting at the middle of the interval and one starting T seconds before. If there is periodicity the correlation coefficient should be high. However, some sounds do not show any regularity, or very low frequency and regularity, and nevertheless have a definite pitch, e.g. Shepard sounds. */ Pitch_Frame_getPitch (pitchFrame, &f0, &pitch_strength); if (f0 > 0) { cc[i] = Sound_correlateParts (sound.peek(), tmid - 1.0 / f0, tmid, 1.0 / f0); } } // Base V/UV decision on correlation coefficients. // Resize the pitch strengths w.r.t. the cc. double vuvCriterium = 0.52; for (long i = 1; i <= numberOfFrames; i++) { Pitch_Frame_resizeStrengths (& thy frame[i], cc[i], vuvCriterium); } return thee.transfer(); } catch (MelderError) { Melder_throw (me, U": no Pitch (shs) created."); } }