static void Sound_PointProcess_fillVoiceless (Sound me, PointProcess pulses) { long ipointleft, ipointright; double beginVoiceless = my xmin, endVoiceless; for (ipointleft = 1; ipointleft <= pulses -> nt; ipointleft = ipointright + 1) { long i1, i2, i; endVoiceless = pulses -> t [ipointleft] - 0.005; i1 = Sampled_xToHighIndex (me, beginVoiceless); if (i1 < 1) i1 = 1; if (i1 > my nx) i1 = my nx; i2 = Sampled_xToLowIndex (me, endVoiceless); if (i2 < 1) i2 = 1; if (i2 > my nx) i2 = my nx; if (i2 - i1 > 10) for (i = i1; i <= i2; i ++) my z [1] [i] = NUMrandomGauss (0.0, 0.3); for (ipointright = ipointleft + 1; ipointright <= pulses -> nt; ipointright ++) if (pulses -> t [ipointright] - pulses -> t [ipointright - 1] > MAX_T) break; ipointright --; beginVoiceless = pulses -> t [ipointright] + 0.005; } endVoiceless = my xmax; { long i1, i2, i; i1 = Sampled_xToHighIndex (me, beginVoiceless); if (i1 < 1) i1 = 1; if (i1 > my nx) i1 = my nx; i2 = Sampled_xToLowIndex (me, endVoiceless); if (i2 < 1) i2 = 1; if (i2 > my nx) i2 = my nx; if (i2 - i1 > 10) for (i = i1; i <= i2; i ++) my z [1] [i] = NUMrandomGauss (0.0, 0.3); } }
int Manipulation_playPart (Manipulation me, double tmin, double tmax, int method) { try { if (method == Manipulation_OVERLAPADD) { if (! my sound) Melder_throw (U"Cannot synthesize overlap-add without a sound."); autoSound part = Data_copy (my sound.get()); long imin = Sampled_xToLowIndex (part.get(), tmin), imax = Sampled_xToHighIndex (part.get(), tmax); double *amp = part -> z [1]; for (long i = 1; i <= imin; i ++) amp [i] = 0.0; for (long i = imax; i <= part -> nx; i ++) amp [i] = 0.0; autoSound saved = my sound.move(); my sound = part.move(); try { autoSound played = Manipulation_to_Sound (me, Manipulation_OVERLAPADD); my sound = saved.move(); amp = played -> z [1]; for (imin = 1; imin <= played -> nx; imin ++) if (amp [imin] != 0.0) break; for (imax = played -> nx; imax >= 1; imax --) if (amp [imax] != 0.0) break; Sound_playPart (played.get(), played -> x1 + (imin - 1.5) * played -> dx, played -> x1 + (imax - 0.5) * played -> dx, nullptr, nullptr); } catch (MelderError) { my sound = saved.move(); throw; } } else { autoSound sound = Manipulation_to_Sound (me, method); Sound_playPart (sound.get(), tmin, tmax, nullptr, nullptr); } return 1; } catch (MelderError) { Melder_throw (me, U": not played."); } }
double Sound_getNearestZeroCrossing (Sound me, double position, long channel) { double *amplitude = my z [channel]; long leftSample = Sampled_xToLowIndex (me, position); long rightSample = leftSample + 1, ileft, iright; double leftZero, rightZero; /* Are we already at a zero crossing? */ if (leftSample >= 1 && rightSample <= my nx && (amplitude [leftSample] >= 0.0) != (amplitude [rightSample] >= 0.0)) { return interpolate (me, leftSample, channel); } /* Search to the left. */ if (leftSample > my nx) return NUMundefined; for (ileft = leftSample - 1; ileft >= 1; ileft --) if ((amplitude [ileft] >= 0.0) != (amplitude [ileft + 1] >= 0.0)) { leftZero = interpolate (me, ileft, channel); break; } /* Search to the right. */ if (rightSample < 1) return NUMundefined; for (iright = rightSample + 1; iright <= my nx; iright ++) if ((amplitude [iright] >= 0.0) != (amplitude [iright - 1] >= 0.0)) { rightZero = interpolate (me, iright - 1, channel); break; } if (ileft < 1 && iright > my nx) return NUMundefined; return ileft < 1 ? rightZero : iright > my nx ? leftZero : position - leftZero < rightZero - position ? leftZero : rightZero; }
Polygon Sound_to_Polygon (Sound me, int channel, double tmin, double tmax, double ymin, double ymax, double level) { try { bool clip = ymin < ymax; if (channel < 1 || channel > my ny) { Melder_throw ("Channel does not exist."); } if (tmin >= tmax) { tmin = my xmin; tmax = my xmax; } if (tmin < my xmin) { tmin = my xmin; } if (tmax > my xmax) { tmax = my xmax; } if (tmin >= my xmax || tmax < my xmin) { Melder_throw ("Invalid domain."); } long k = 1, i1 = Sampled_xToHighIndex (me, tmin); long i2 = Sampled_xToLowIndex (me, tmax); long numberOfPoints = i2 - i1 + 1 + 2 + 2; // begin + endpoint + level autoPolygon him = Polygon_create (numberOfPoints); /* In Vector_getValueAtX the interpolation only returns defined values between the left and right edges that are calculated as left = x1 - 0.5 * dx; right = left + my nx * dx. Given a sound, for example on the domain [0,...], the value of 'left' with the above formula might not return exactly xmin but instead a very small deviation (due to the imprecise representation of real numbers in a computer). Querying for the value at xmin which is outside the interpolation domain then produces an 'undefined'. We try to avoid this with the following workaround. */ double xmin = my x1 - 0.5 * my dx; double xmax = xmin + my nx * my dx; tmin = tmin < xmin ? xmin : tmin; tmax = tmax > xmax ? xmax : tmax; // End of workaround his x[k] = tmin; his y[k++] = CLIP_Y (level, ymin, ymax); his x[k] = tmin; double y = Vector_getValueAtX (me, tmin, channel, Vector_VALUE_INTERPOLATION_LINEAR); his y[k++] = CLIP_Y (y, ymin, ymax); for (long i = i1; i <= i2; i++) { y = my z[channel][i]; his x[k] = my x1 + (i - 1) * my dx; his y[k++] = CLIP_Y (y, ymin, ymax); } his x[k] = tmax; y = Vector_getValueAtX (me, tmax, channel, Vector_VALUE_INTERPOLATION_LINEAR); his y[k++] = CLIP_Y (y, ymin, ymax); his x[k] = tmax; his y[k++] = CLIP_Y (level, ymin, ymax); return him.transfer(); } catch (MelderError) { Melder_throw (me, ":no Polygon created."); } }
static Spectrum Spectrum_band (Spectrum me, double fmin, double fmax) { autoSpectrum band = Data_copy (me); double *re = band -> z [1], *im = band -> z [2]; long imin = Sampled_xToLowIndex (band.peek(), fmin), imax = Sampled_xToHighIndex (band.peek(), fmax); for (long i = 1; i <= imin; i ++) re [i] = 0.0, im [i] = 0.0; for (long i = imax; i <= band -> nx; i ++) re [i] = 0.0, im [i] = 0.0; return band.transfer(); }
int SoundEditor::menu_cb_Paste (EDITOR_ARGS) { SoundEditor *editor = (SoundEditor *)editor_me; Sound sound = (Sound) editor->_data; long leftSample = Sampled_xToLowIndex (sound, editor->_endSelection); long oldNumberOfSamples = sound -> nx, newNumberOfSamples; double **newData, **oldData = sound -> z; if (! Sound_clipboard) { Melder_warning1 (L"(SoundEditor_paste:) Clipboard is empty; nothing pasted."); return 1; } if (Sound_clipboard -> ny != sound -> ny) return Melder_error1 (L"(SoundEditor_paste:) Cannot paste because\n" "the number of channels of the clipboard is not equal to\n" "the number of channels of the edited sound."); if (Sound_clipboard -> dx != sound -> dx) return Melder_error1 (L"(SoundEditor_paste:) Cannot paste because\n" "the sampling frequency of the clipboard is not equal to\n" "the sampling frequency of the edited sound."); if (leftSample < 0) leftSample = 0; if (leftSample > oldNumberOfSamples) leftSample = oldNumberOfSamples; newNumberOfSamples = oldNumberOfSamples + Sound_clipboard -> nx; if (! (newData = NUMdmatrix (1, sound -> ny, 1, newNumberOfSamples))) return 0; for (long channel = 1; channel <= sound -> ny; channel ++) { long j = 0; for (long i = 1; i <= leftSample; i ++) { newData [channel] [++ j] = oldData [channel] [i]; } for (long i = 1; i <= Sound_clipboard -> nx; i ++) { newData [channel] [++ j] = Sound_clipboard -> z [channel] [i]; } for (long i = leftSample + 1; i <= oldNumberOfSamples; i ++) { newData [channel] [++ j] = oldData [channel] [i]; } } editor->save (L"Paste"); NUMdmatrix_free (oldData, 1, 1); sound -> xmin = 0.0; sound -> xmax = newNumberOfSamples * sound -> dx; sound -> nx = newNumberOfSamples; sound -> x1 = 0.5 * sound -> dx; sound -> z = newData; /* Start updating the markers of the FunctionEditor, respecting the invariants. */ editor->_tmin = sound -> xmin; editor->_tmax = sound -> xmax; editor->_startSelection = leftSample * sound -> dx; editor->_endSelection = (leftSample + Sound_clipboard -> nx) * sound -> dx; /* Force FunctionEditor to show changes. */ Matrix_getWindowExtrema (sound, 1, sound -> nx, 1, sound -> ny, & editor->_sound.minimum, & editor->_sound.maximum); editor->destroy_analysis (); editor->ungroup (); editor->marksChanged (); editor->broadcastChange (); return 1; }
static double Sound_findMaximumCorrelation (Sound me, double t1, double windowLength, double tmin2, double tmax2, double *tout, double *peak) { double maximumCorrelation = -1.0, r1 = 0.0, r2 = 0.0, r3 = 0.0, r1_best, r3_best, ir; double halfWindowLength = 0.5 * windowLength; long i1, i2, ileft2; long ileft1 = Sampled_xToNearestIndex ((Sampled) me, t1 - halfWindowLength); long iright1 = Sampled_xToNearestIndex ((Sampled) me, t1 + halfWindowLength); long ileft2min = Sampled_xToLowIndex ((Sampled) me, tmin2 - halfWindowLength); long ileft2max = Sampled_xToHighIndex ((Sampled) me, tmax2 - halfWindowLength); *peak = 0.0; /* Default. */ for (ileft2 = ileft2min; ileft2 <= ileft2max; ileft2 ++) { double norm1 = 0.0, norm2 = 0.0, product = 0.0, localPeak = 0.0; if (my ny == 1) { for (i1 = ileft1, i2 = ileft2; i1 <= iright1; i1 ++, i2 ++) { if (i1 < 1 || i1 > my nx || i2 < 1 || i2 > my nx) continue; double amp1 = my z [1] [i1], amp2 = my z [1] [i2]; norm1 += amp1 * amp1; norm2 += amp2 * amp2; product += amp1 * amp2; if (fabs (amp2) > localPeak) localPeak = fabs (amp2); } } else { for (i1 = ileft1, i2 = ileft2; i1 <= iright1; i1 ++, i2 ++) { if (i1 < 1 || i1 > my nx || i2 < 1 || i2 > my nx) continue; double amp1 = 0.5 * (my z [1] [i1] + my z [2] [i1]), amp2 = 0.5 * (my z [1] [i2] + my z [2] [i2]); norm1 += amp1 * amp1; norm2 += amp2 * amp2; product += amp1 * amp2; if (fabs (amp2) > localPeak) localPeak = fabs (amp2); } } r1 = r2; r2 = r3; r3 = product ? product / (sqrt (norm1 * norm2)) : 0.0; if (r2 > maximumCorrelation && r2 >= r1 && r2 >= r3) { r1_best = r1; maximumCorrelation = r2; r3_best = r3; ir = ileft2 - 1; *peak = localPeak; } } /* * Improve the result by means of parabolic interpolation. */ if (maximumCorrelation > -1.0) { double d2r = 2 * maximumCorrelation - r1_best - r3_best; if (d2r != 0.0) { double dr = 0.5 * (r3_best - r1_best); maximumCorrelation += 0.5 * dr * dr / d2r; ir += dr / d2r; } *tout = t1 + (ir - ileft1) * my dx; } return maximumCorrelation; }
autoComplexSpectrogram Sound_to_ComplexSpectrogram (Sound me, double windowLength, double timeStep) { try { double samplingFrequency = 1.0 / my dx, myDuration = my xmax - my xmin, t1; if (windowLength > myDuration) { Melder_throw (U"Your sound is too short:\nit should be at least as long as one window length."); } long nsamp_window = (long) floor (windowLength / my dx); long halfnsamp_window = nsamp_window / 2 - 1; nsamp_window = halfnsamp_window * 2; if (nsamp_window < 2) { Melder_throw (U"Your analysis window is too short: less than two samples."); } long numberOfFrames; Sampled_shortTermAnalysis (me, windowLength, timeStep, &numberOfFrames, &t1); // Compute sampling of the spectrum long numberOfFrequencies = halfnsamp_window + 1; double df = samplingFrequency / (numberOfFrequencies - 1); autoComplexSpectrogram thee = ComplexSpectrogram_create (my xmin, my xmax, numberOfFrames, timeStep, t1, 0.0, 0.5 * samplingFrequency, numberOfFrequencies, df, 0.0); // autoSound analysisWindow = Sound_create (1, 0.0, nsamp_window * my dx, nsamp_window, my dx, 0.5 * my dx); for (long iframe = 1; iframe <= numberOfFrames; iframe++) { double t = Sampled_indexToX (thee.get(), iframe); long leftSample = Sampled_xToLowIndex (me, t), rightSample = leftSample + 1; long startSample = rightSample - halfnsamp_window; long endSample = leftSample + halfnsamp_window; Melder_assert (startSample >= 1); Melder_assert (endSample <= my nx); for (long j = 1; j <= nsamp_window; j++) { analysisWindow -> z[1][j] = my z[1][startSample - 1 + j]; } // window ? autoSpectrum spec = Sound_to_Spectrum (analysisWindow.get(), 0); thy z[1][iframe] = spec -> z[1][1] * spec -> z[1][1]; thy phase[1][iframe] = 0.0; for (long ifreq = 2; ifreq <= numberOfFrequencies - 1; ifreq++) { double x = spec -> z[1][ifreq], y = spec -> z[2][ifreq]; thy z[ifreq][iframe] = x * x + y * y; // power thy phase[ifreq][iframe] = atan2 (y, x); // phase [-pi,+pi] } // even number of samples thy z[numberOfFrequencies][iframe] = spec -> z[1][numberOfFrequencies] * spec -> z[1][numberOfFrequencies]; thy phase[numberOfFrequencies][iframe] = 0.0; } return thee; } catch (MelderError) { Melder_throw (me, U": no ComplexSpectrogram created."); } }
static double Sound_findExtremum (Sound me, double tmin, double tmax, int includeMaxima, int includeMinima) { long imin = Sampled_xToLowIndex (me, tmin), imax = Sampled_xToHighIndex (me, tmax); double iextremum; Melder_assert (NUMdefined (tmin)); Melder_assert (NUMdefined (tmax)); if (imin < 1) imin = 1; if (imax > my nx) imax = my nx; iextremum = findExtremum_3 (my z [1], my ny > 1 ? my z [2] : NULL, imin - 1, imax - imin + 1, includeMaxima, includeMinima); if (iextremum) return my x1 + (imin - 1 + iextremum - 1) * my dx; else return (tmin + tmax) / 2; }
autoCochleagram Sound_to_Cochleagram (Sound me, double dt, double df, double dt_window, double forwardMaskingTime) { try { double duration = my nx * my dx; long nFrames = 1 + (long) floor ((duration - dt_window) / dt); long nsamp_window = (long) floor (dt_window / my dx), halfnsamp_window = nsamp_window / 2 - 1; long nf = lround (25.6 / df); double dampingFactor = forwardMaskingTime > 0.0 ? exp (- dt / forwardMaskingTime) : 0.0; // default 30 ms double integrationCorrection = 1.0 - dampingFactor; nsamp_window = halfnsamp_window * 2; if (nFrames < 2) return autoCochleagram (); double t1 = my x1 + 0.5 * (duration - my dx - (nFrames - 1) * dt); // centre of first frame autoCochleagram thee = Cochleagram_create (my xmin, my xmax, nFrames, dt, t1, df, nf); autoSound window = Sound_createSimple (1, nsamp_window * my dx, 1.0 / my dx); for (long iframe = 1; iframe <= nFrames; iframe ++) { double t = Sampled_indexToX (thee.get(), iframe); long leftSample = Sampled_xToLowIndex (me, t); long rightSample = leftSample + 1; long startSample = rightSample - halfnsamp_window; long endSample = rightSample + halfnsamp_window; if (startSample < 1) { Melder_casual (U"Start sample too small: ", startSample, U" instead of 1."); startSample = 1; } if (endSample > my nx) { Melder_casual (U"End sample too small: ", endSample, U" instead of ", my nx, U"."); endSample = my nx; } /* Copy a window to a frame. */ for (long i = 1; i <= nsamp_window; i ++) window -> z [1] [i] = ( my ny == 1 ? my z[1][i+startSample-1] : 0.5 * (my z[1][i+startSample-1] + my z[2][i+startSample-1]) ) * (0.5 - 0.5 * cos (2.0 * NUMpi * i / (nsamp_window + 1))); autoSpectrum spec = Sound_to_Spectrum (window.get(), true); autoExcitation excitation = Spectrum_to_Excitation (spec.get(), df); for (long ifreq = 1; ifreq <= nf; ifreq ++) thy z [ifreq] [iframe] = excitation -> z [1] [ifreq] + ( iframe > 1 ? dampingFactor * thy z [ifreq] [iframe - 1] : 0 ); } for (long iframe = 1; iframe <= nFrames; iframe ++) for (long ifreq = 1; ifreq <= nf; ifreq ++) thy z [ifreq] [iframe] *= integrationCorrection; return thee; } catch (MelderError) { Melder_throw (me, U": not converted to Cochleagram."); } }
autoVocalTract LPC_to_VocalTract (LPC me, double time, double length) { try { long iframe = Sampled_xToLowIndex (me, time); // ppgb: BUG? Is rounding down the correct thing to do? if (iframe < 1) { iframe = 1; } if (iframe > my nx) { iframe = my nx; } LPC_Frame lpc = & my d_frames[iframe]; autoVocalTract thee = LPC_Frame_to_VocalTract (lpc, length); return thee; } catch (MelderError) { Melder_throw (me, U": no VocalTract created."); } }
autoSpectrum ComplexSpectrogram_to_Spectrum (ComplexSpectrogram me, double time) { try { long iframe = Sampled_xToLowIndex (me, time); // ppgb: geen Sampled_xToIndex gebruiken voor integers (afrondingen altijd expliciet maken) iframe = iframe < 1 ? 1 : (iframe > my nx ? my nx : iframe); autoSpectrum thee = Spectrum_create (my ymax, my ny); for (long ifreq = 1; ifreq <= my ny; ifreq++) { double a = sqrt (my z[ifreq][iframe]); double phi = my phase[ifreq][iframe]; thy z[1][ifreq] = a * cos (phi); thy z[2][ifreq] = a * sin (phi); } return thee; } catch (MelderError) { Melder_throw (me, U": no Spectrum created."); } }
autoVocalTract LPC_to_VocalTract (LPC me, double time, double glottalDamping, bool radiationDamping, bool internalDamping) { try { long iframe = Sampled_xToLowIndex (me, time); // ppgb: BUG? Is rounding down the correct thing to do? not nearestIndex? if (iframe < 1) { iframe = 1; } if (iframe > my nx) { iframe = my nx; } LPC_Frame lpc = & my d_frames[iframe]; autoVocalTract thee = LPC_Frame_to_VocalTract (lpc, 0.17); double length = VocalTract_and_LPC_Frame_getMatchingLength (thee.peek(), lpc, glottalDamping, radiationDamping, internalDamping); VocalTract_setLength (thee.peek(), length); return thee; } catch (MelderError) { Melder_throw (me, U": no VocalTract created."); } }
void SPINET_drawSpectrum (SPINET me, Graphics g, double time, double fromErb, double toErb, double minimum, double maximum, int enhanced, int garnish) { long ifmin, ifmax, icol = Sampled_xToLowIndex (me, time); // ppgb: don't use Sampled2_xToColumn for integer rounding double **z = enhanced ? my s : my y; if (icol < 1 || icol > my nx) { return; } if (toErb <= fromErb) { fromErb = my ymin; toErb = my ymax; } SampledXY_getWindowSamplesY (me, fromErb, toErb, &ifmin, &ifmax); autoNUMvector<double> spec (1, my ny); for (long i = 1; i <= my ny; i++) { spec[i] = z[i][icol]; } if (maximum <= minimum) { NUMvector_extrema (spec.peek(), ifmin, ifmax, &minimum, &maximum); } if (maximum <= minimum) { minimum -= 1; maximum += 1; } for (long i = ifmin; i <= ifmax; i++) { if (spec[i] > maximum) { spec[i] = maximum; } else if (spec[i] < minimum) { spec[i] = minimum; } } Graphics_setInner (g); Graphics_setWindow (g, fromErb, toErb, minimum, maximum); Graphics_function (g, spec.peek(), ifmin, ifmax, SampledXY_indexToY (me, ifmin), SampledXY_indexToY (me, ifmax)); Graphics_unsetInner (g); if (garnish) { Graphics_drawInnerBox (g); Graphics_textBottom (g, true, U"Frequency (ERB)"); Graphics_marksBottom (g, 2, true, true, false); Graphics_textLeft (g, true, U"strength"); Graphics_marksLeft (g, 2, true, true, false); } }
static void menu_cb_voiceless (EDITOR_ARGS) { EDITOR_IAM (PitchEditor); Pitch pitch = (Pitch) my data; long ileft = Sampled_xToHighIndex (pitch, my d_startSelection); long iright = Sampled_xToLowIndex (pitch, my d_endSelection); if (ileft < 1) ileft = 1; if (iright > pitch -> nx) iright = pitch -> nx; Editor_save (me, L"Unvoice"); for (long i = ileft; i <= iright; i ++) { Pitch_Frame frame = & pitch -> frame [i]; for (long cand = 1; cand <= frame -> nCandidates; cand ++) { if (frame -> candidate [cand]. frequency == 0.0) { struct structPitch_Candidate help = frame -> candidate [1]; frame -> candidate [1] = frame -> candidate [cand]; frame -> candidate [cand] = help; } } } FunctionEditor_redraw (me); my broadcastDataChanged (); }
/* gain used as a constant amplitude multiplyer within a frame of duration my dx. future alternative: convolve gain with a smoother. */ autoSound LPC_and_Sound_filter (LPC me, Sound thee, int useGain) { try { double xmin = my xmin > thy xmin ? my xmin : thy xmin; double xmax = my xmax < thy xmax ? my xmax : thy xmax; if (xmin >= xmax) { Melder_throw (U"Domains of Sound [", thy xmin, U",", thy xmax, U"] and LPC [", my xmin, U",", my xmax, U"] do not overlap."); } // resample sound if samplings don't match autoSound source; if (my samplingPeriod != thy dx) { source = Sound_resample (thee, 1.0 / my samplingPeriod, 50); thee = source.get(); // reference copy; remove at end } autoSound him = Data_copy (thee); double *x = his z[1]; long ifirst = Sampled_xToHighIndex (thee, xmin); long ilast = Sampled_xToLowIndex (thee, xmax); for (long i = ifirst; i <= ilast; i++) { double t = his x1 + (i - 1) * his dx; /* Sampled_indexToX (him, i) */ long iFrame = lround ( (t - my x1) / my dx + 1.0); /* Sampled_xToNearestIndex (me, t) */ if (iFrame < 1) { continue; } if (iFrame > my nx) { break; } double *a = my d_frames[iFrame].a; long m = i > my d_frames[iFrame].nCoefficients ? my d_frames[iFrame].nCoefficients : i - 1; for (long j = 1; j <= m; j++) { x[i] -= a[j] * x[i - j]; } } // Make samples before first frame and after last frame zero. for (long i = 1; i < ifirst; i++) { x[i] = 0.0; } for (long i = ilast + 1; i <= his nx; i++) { x[i] = 0.0; } if (useGain) { for (long i = ifirst; i <= ilast; i++) { double t = his x1 + (i - 1) * his dx; /* Sampled_indexToX (him, i) */ double riFrame = (t - my x1) / my dx + 1; /* Sampled_xToIndex (me, t); */ long iFrame = (long) floor (riFrame); double phase = riFrame - iFrame; if (iFrame < 0 || iFrame > my nx) { x[i] = 0.0; } else if (iFrame == 0) { x[i] *= sqrt (my d_frames[1].gain) * phase; } else if (iFrame == my nx) { x[i] *= sqrt (my d_frames[my nx].gain) * (1.0 - phase); } else x[i] *= phase * sqrt (my d_frames[iFrame + 1].gain) + (1.0 - phase) * sqrt (my d_frames[iFrame].gain); } } return him; } catch (MelderError) { Melder_throw (thee, U": not filtered."); } }
autoSpectrogram Sound_to_Spectrogram (Sound me, double effectiveAnalysisWidth, double fmax, double minimumTimeStep1, double minimumFreqStep1, enum kSound_to_Spectrogram_windowShape windowType, double maximumTimeOversampling, double maximumFreqOversampling) { try { double nyquist = 0.5 / my dx; double physicalAnalysisWidth = windowType == kSound_to_Spectrogram_windowShape_GAUSSIAN ? 2 * effectiveAnalysisWidth : effectiveAnalysisWidth; double effectiveTimeWidth = effectiveAnalysisWidth / sqrt (NUMpi); double effectiveFreqWidth = 1 / effectiveTimeWidth; double minimumTimeStep2 = effectiveTimeWidth / maximumTimeOversampling; double minimumFreqStep2 = effectiveFreqWidth / maximumFreqOversampling; double timeStep = minimumTimeStep1 > minimumTimeStep2 ? minimumTimeStep1 : minimumTimeStep2; double freqStep = minimumFreqStep1 > minimumFreqStep2 ? minimumFreqStep1 : minimumFreqStep2; double duration = my dx * (double) my nx, windowssq = 0.0; /* * Compute the time sampling. */ long nsamp_window = (long) floor (physicalAnalysisWidth / my dx); long halfnsamp_window = nsamp_window / 2 - 1; nsamp_window = halfnsamp_window * 2; if (nsamp_window < 1) Melder_throw (U"Your analysis window is too short: less than two samples."); if (physicalAnalysisWidth > duration) Melder_throw (U"Your sound is too short:\n" U"it should be at least as long as ", windowType == kSound_to_Spectrogram_windowShape_GAUSSIAN ? U"two window lengths." : U"one window length."); long numberOfTimes = 1 + (long) floor ((duration - physicalAnalysisWidth) / timeStep); // >= 1 double t1 = my x1 + 0.5 * ((double) (my nx - 1) * my dx - (double) (numberOfTimes - 1) * timeStep); /* Centre of first frame. */ /* * Compute the frequency sampling of the FFT spectrum. */ if (fmax <= 0.0 || fmax > nyquist) fmax = nyquist; long numberOfFreqs = (long) floor (fmax / freqStep); if (numberOfFreqs < 1) return autoSpectrogram (); long nsampFFT = 1; while (nsampFFT < nsamp_window || nsampFFT < 2 * numberOfFreqs * (nyquist / fmax)) nsampFFT *= 2; long half_nsampFFT = nsampFFT / 2; /* * Compute the frequency sampling of the spectrogram. */ long binWidth_samples = (long) floor (freqStep * my dx * nsampFFT); if (binWidth_samples < 1) binWidth_samples = 1; double binWidth_hertz = 1.0 / (my dx * nsampFFT); freqStep = binWidth_samples * binWidth_hertz; numberOfFreqs = (long) floor (fmax / freqStep); if (numberOfFreqs < 1) return autoSpectrogram (); autoSpectrogram thee = Spectrogram_create (my xmin, my xmax, numberOfTimes, timeStep, t1, 0.0, fmax, numberOfFreqs, freqStep, 0.5 * (freqStep - binWidth_hertz)); autoNUMvector <double> frame (1, nsampFFT); autoNUMvector <double> spec (1, nsampFFT); autoNUMvector <double> window (1, nsamp_window); autoNUMfft_Table fftTable; NUMfft_Table_init (& fftTable, nsampFFT); autoMelderProgress progress (U"Sound to Spectrogram..."); for (long i = 1; i <= nsamp_window; i ++) { double nSamplesPerWindow_f = physicalAnalysisWidth / my dx; double phase = (double) i / nSamplesPerWindow_f; // 0 .. 1 double value; switch (windowType) { case kSound_to_Spectrogram_windowShape_SQUARE: value = 1.0; break; case kSound_to_Spectrogram_windowShape_HAMMING: value = 0.54 - 0.46 * cos (2.0 * NUMpi * phase); break; case kSound_to_Spectrogram_windowShape_BARTLETT: value = 1.0 - fabs ((2.0 * phase - 1.0)); break; case kSound_to_Spectrogram_windowShape_WELCH: value = 1.0 - (2.0 * phase - 1.0) * (2.0 * phase - 1.0); break; case kSound_to_Spectrogram_windowShape_HANNING: value = 0.5 * (1.0 - cos (2.0 * NUMpi * phase)); break; case kSound_to_Spectrogram_windowShape_GAUSSIAN: { double imid = 0.5 * (double) (nsamp_window + 1), edge = exp (-12.0); phase = ((double) i - imid) / nSamplesPerWindow_f; /* -0.5 .. +0.5 */ value = (exp (-48.0 * phase * phase) - edge) / (1.0 - edge); break; } break; default: value = 1.0; } window [i] = (float) value; windowssq += value * value; } double oneByBinWidth = 1.0 / windowssq / binWidth_samples; for (long iframe = 1; iframe <= numberOfTimes; iframe ++) { double t = Sampled_indexToX (thee.peek(), iframe); long leftSample = Sampled_xToLowIndex (me, t), rightSample = leftSample + 1; long startSample = rightSample - halfnsamp_window; long endSample = leftSample + halfnsamp_window; Melder_assert (startSample >= 1); Melder_assert (endSample <= my nx); for (long i = 1; i <= half_nsampFFT; i ++) { spec [i] = 0.0; } for (long channel = 1; channel <= my ny; channel ++) { for (long j = 1, i = startSample; j <= nsamp_window; j ++) { frame [j] = my z [channel] [i ++] * window [j]; } for (long j = nsamp_window + 1; j <= nsampFFT; j ++) frame [j] = 0.0f; Melder_progress (iframe / (numberOfTimes + 1.0), U"Sound to Spectrogram: analysis of frame ", iframe, U" out of ", numberOfTimes); /* Compute Fast Fourier Transform of the frame. */ NUMfft_forward (& fftTable, frame.peek()); // complex spectrum /* Put power spectrum in frame [1..half_nsampFFT + 1]. */ spec [1] += frame [1] * frame [1]; // DC component for (long i = 2; i <= half_nsampFFT; i ++) spec [i] += frame [i + i - 2] * frame [i + i - 2] + frame [i + i - 1] * frame [i + i - 1]; spec [half_nsampFFT + 1] += frame [nsampFFT] * frame [nsampFFT]; // Nyquist frequency. Correct?? } if (my ny > 1 ) for (long i = 1; i <= half_nsampFFT; i ++) { spec [i] /= my ny; } /* Bin into frame [1..nBands]. */ for (long iband = 1; iband <= numberOfFreqs; iband ++) { long leftsample = (iband - 1) * binWidth_samples + 1, rightsample = leftsample + binWidth_samples; float power = 0.0f; for (long i = leftsample; i < rightsample; i ++) power += spec [i]; thy z [iband] [iframe] = power * oneByBinWidth; } } return thee; } catch (MelderError) { Melder_throw (me, U": spectrogram analysis not performed."); } }
static void Sound_into_PitchFrame (Sound me, Pitch_Frame pitchFrame, double t, double minimumPitch, int maxnCandidates, int method, double voicingThreshold, double octaveCost, NUMfft_Table fftTable, double dt_window, long nsamp_window, long halfnsamp_window, long maximumLag, long nsampFFT, long nsamp_period, long halfnsamp_period, long brent_ixmax, long brent_depth, double globalPeak, double **frame, double *ac, double *window, double *windowR, double *r, long *imax, double *localMean) { double localPeak; long leftSample = Sampled_xToLowIndex (me, t), rightSample = leftSample + 1; long startSample, endSample; for (long channel = 1; channel <= my ny; channel ++) { /* * Compute the local mean; look one longest period to both sides. */ startSample = rightSample - nsamp_period; endSample = leftSample + nsamp_period; Melder_assert (startSample >= 1); Melder_assert (endSample <= my nx); localMean [channel] = 0.0; for (long i = startSample; i <= endSample; i ++) { localMean [channel] += my z [channel] [i]; } localMean [channel] /= 2 * nsamp_period; /* * Copy a window to a frame and subtract the local mean. * We are going to kill the DC component before windowing. */ startSample = rightSample - halfnsamp_window; endSample = leftSample + halfnsamp_window; Melder_assert (startSample >= 1); Melder_assert (endSample <= my nx); if (method < FCC_NORMAL) { for (long j = 1, i = startSample; j <= nsamp_window; j ++) frame [channel] [j] = (my z [channel] [i ++] - localMean [channel]) * window [j]; for (long j = nsamp_window + 1; j <= nsampFFT; j ++) frame [channel] [j] = 0.0; } else { for (long j = 1, i = startSample; j <= nsamp_window; j ++) frame [channel] [j] = my z [channel] [i ++] - localMean [channel]; } } /* * Compute the local peak; look half a longest period to both sides. */ localPeak = 0.0; if ((startSample = halfnsamp_window + 1 - halfnsamp_period) < 1) startSample = 1; if ((endSample = halfnsamp_window + halfnsamp_period) > nsamp_window) endSample = nsamp_window; for (long channel = 1; channel <= my ny; channel ++) { for (long j = startSample; j <= endSample; j ++) { double value = fabs (frame [channel] [j]); if (value > localPeak) localPeak = value; } } pitchFrame->intensity = localPeak > globalPeak ? 1.0 : localPeak / globalPeak; /* * Compute the correlation into the array 'r'. */ if (method >= FCC_NORMAL) { double startTime = t - 0.5 * (1.0 / minimumPitch + dt_window); long localSpan = maximumLag + nsamp_window, localMaximumLag, offset; if ((startSample = Sampled_xToLowIndex (me, startTime)) < 1) startSample = 1; if (localSpan > my nx + 1 - startSample) localSpan = my nx + 1 - startSample; localMaximumLag = localSpan - nsamp_window; offset = startSample - 1; double sumx2 = 0; // sum of squares for (long channel = 1; channel <= my ny; channel ++) { double *amp = my z [channel] + offset; for (long i = 1; i <= nsamp_window; i ++) { double x = amp [i] - localMean [channel]; sumx2 += x * x; } } double sumy2 = sumx2; // at zero lag, these are still equal r [0] = 1.0; for (long i = 1; i <= localMaximumLag; i ++) { double product = 0.0; for (long channel = 1; channel <= my ny; channel ++) { double *amp = my z [channel] + offset; double y0 = amp [i] - localMean [channel]; double yZ = amp [i + nsamp_window] - localMean [channel]; sumy2 += yZ * yZ - y0 * y0; for (long j = 1; j <= nsamp_window; j ++) { double x = amp [j] - localMean [channel]; double y = amp [i + j] - localMean [channel]; product += x * y; } } r [- i] = r [i] = product / sqrt (sumx2 * sumy2); } } else { /* * The FFT of the autocorrelation is the power spectrum. */ for (long i = 1; i <= nsampFFT; i ++) { ac [i] = 0.0; } for (long channel = 1; channel <= my ny; channel ++) { NUMfft_forward (fftTable, frame [channel]); // complex spectrum ac [1] += frame [channel] [1] * frame [channel] [1]; // DC component for (long i = 2; i < nsampFFT; i += 2) { ac [i] += frame [channel] [i] * frame [channel] [i] + frame [channel] [i+1] * frame [channel] [i+1]; // power spectrum } ac [nsampFFT] += frame [channel] [nsampFFT] * frame [channel] [nsampFFT]; // Nyquist frequency } NUMfft_backward (fftTable, ac); /* Autocorrelation. */ /* * Normalize the autocorrelation to the value with zero lag, * and divide it by the normalized autocorrelation of the window. */ r [0] = 1.0; for (long i = 1; i <= brent_ixmax; i ++) r [- i] = r [i] = ac [i + 1] / (ac [1] * windowR [i + 1]); } /* * Register the first candidate, which is always present: voicelessness. */ pitchFrame->nCandidates = 1; pitchFrame->candidate[1].frequency = 0.0; // voiceless: always present pitchFrame->candidate[1].strength = 0.0; /* * Shortcut: absolute silence is always voiceless. * We are done for this frame. */ if (localPeak == 0) return; /* * Find the strongest maxima of the correlation of this frame, * and register them as candidates. */ imax [1] = 0; for (long i = 2; i < maximumLag && i < brent_ixmax; i ++) if (r [i] > 0.5 * voicingThreshold && // not too unvoiced? r [i] > r [i-1] && r [i] >= r [i+1]) // maximum? { int place = 0; /* * Use parabolic interpolation for first estimate of frequency, * and sin(x)/x interpolation to compute the strength of this frequency. */ double dr = 0.5 * (r [i+1] - r [i-1]), d2r = 2 * r [i] - r [i-1] - r [i+1]; double frequencyOfMaximum = 1 / my dx / (i + dr / d2r); long offset = - brent_ixmax - 1; double strengthOfMaximum = /* method & 1 ? */ NUM_interpolate_sinc (& r [offset], brent_ixmax - offset, 1 / my dx / frequencyOfMaximum - offset, 30) /* : r [i] + 0.5 * dr * dr / d2r */; /* High values due to short windows are to be reflected around 1. */ if (strengthOfMaximum > 1.0) strengthOfMaximum = 1.0 / strengthOfMaximum; /* * Find a place for this maximum. */ if (pitchFrame->nCandidates < maxnCandidates) { // is there still a free place? place = ++ pitchFrame->nCandidates; } else { /* Try the place of the weakest candidate so far. */ double weakest = 2; for (int iweak = 2; iweak <= maxnCandidates; iweak ++) { /* High frequencies are to be favoured */ /* if we want to analyze a perfectly periodic signal correctly. */ double localStrength = pitchFrame->candidate[iweak].strength - octaveCost * NUMlog2 (minimumPitch / pitchFrame->candidate[iweak].frequency); if (localStrength < weakest) { weakest = localStrength; place = iweak; } } /* If this maximum is weaker than the weakest candidate so far, give it no place. */ if (strengthOfMaximum - octaveCost * NUMlog2 (minimumPitch / frequencyOfMaximum) <= weakest) place = 0; } if (place) { // have we found a place for this candidate? pitchFrame->candidate[place].frequency = frequencyOfMaximum; pitchFrame->candidate[place].strength = strengthOfMaximum; imax [place] = i; } } /* * Second pass: for extra precision, maximize sin(x)/x interpolation ('sinc'). */ for (long i = 2; i <= pitchFrame->nCandidates; i ++) { if (method != AC_HANNING || pitchFrame->candidate[i].frequency > 0.0 / my dx) { double xmid, ymid; long offset = - brent_ixmax - 1; ymid = NUMimproveMaximum (& r [offset], brent_ixmax - offset, imax [i] - offset, pitchFrame->candidate[i].frequency > 0.3 / my dx ? NUM_PEAK_INTERPOLATE_SINC700 : brent_depth, & xmid); xmid += offset; pitchFrame->candidate[i].frequency = 1.0 / my dx / xmid; if (ymid > 1.0) ymid = 1.0 / ymid; pitchFrame->candidate[i].strength = ymid; } } }
autoSound Sound_Point_Pitch_Duration_to_Sound (Sound me, PointProcess pulses, PitchTier pitch, DurationTier duration, double maxT) { try { long ipointleft, ipointright; double deltat = 0, handledTime = my xmin; double startOfSourceNoise, endOfSourceNoise, startOfTargetNoise, endOfTargetNoise; double durationOfSourceNoise, durationOfTargetNoise; double startOfSourceVoice, endOfSourceVoice, startOfTargetVoice, endOfTargetVoice; double durationOfSourceVoice, durationOfTargetVoice; double startingPeriod, finishingPeriod, ttarget, voicelessPeriod; if (duration -> points.size == 0) Melder_throw (U"No duration points."); /* * Create a Sound long enough to hold the longest possible duration-manipulated sound. */ autoSound thee = Sound_create (1, my xmin, my xmin + 3 * (my xmax - my xmin), 3 * my nx, my dx, my x1); /* * Below, I'll abbreviate the voiced interval as "voice" and the voiceless interval as "noise". */ if (pitch && pitch -> points.size) for (ipointleft = 1; ipointleft <= pulses -> nt; ipointleft = ipointright + 1) { /* * Find the beginning of the voice. */ startOfSourceVoice = pulses -> t [ipointleft]; // the first pulse of the voice startingPeriod = 1.0 / RealTier_getValueAtTime (pitch, startOfSourceVoice); startOfSourceVoice -= 0.5 * startingPeriod; // the first pulse is in the middle of a period /* * Measure one noise. */ startOfSourceNoise = handledTime; endOfSourceNoise = startOfSourceVoice; durationOfSourceNoise = endOfSourceNoise - startOfSourceNoise; startOfTargetNoise = startOfSourceNoise + deltat; endOfTargetNoise = startOfTargetNoise + RealTier_getArea (duration, startOfSourceNoise, endOfSourceNoise); durationOfTargetNoise = endOfTargetNoise - startOfTargetNoise; /* * Copy the noise. */ voicelessPeriod = NUMrandomUniform (0.008, 0.012); ttarget = startOfTargetNoise + 0.5 * voicelessPeriod; while (ttarget < endOfTargetNoise) { double tsource; double tleft = startOfSourceNoise, tright = endOfSourceNoise; int i; for (i = 1; i <= 15; i ++) { double tsourcemid = 0.5 * (tleft + tright); double ttargetmid = startOfTargetNoise + RealTier_getArea (duration, startOfSourceNoise, tsourcemid); if (ttargetmid < ttarget) tleft = tsourcemid; else tright = tsourcemid; } tsource = 0.5 * (tleft + tright); copyBell (me, tsource, voicelessPeriod, voicelessPeriod, thee.get(), ttarget); voicelessPeriod = NUMrandomUniform (0.008, 0.012); ttarget += voicelessPeriod; } deltat += durationOfTargetNoise - durationOfSourceNoise; /* * Find the end of the voice. */ for (ipointright = ipointleft + 1; ipointright <= pulses -> nt; ipointright ++) if (pulses -> t [ipointright] - pulses -> t [ipointright - 1] > maxT) break; ipointright --; endOfSourceVoice = pulses -> t [ipointright]; // the last pulse of the voice finishingPeriod = 1.0 / RealTier_getValueAtTime (pitch, endOfSourceVoice); endOfSourceVoice += 0.5 * finishingPeriod; // the last pulse is in the middle of a period /* * Measure one voice. */ durationOfSourceVoice = endOfSourceVoice - startOfSourceVoice; /* * This will be copied to an interval with a different location and duration. */ startOfTargetVoice = startOfSourceVoice + deltat; endOfTargetVoice = startOfTargetVoice + RealTier_getArea (duration, startOfSourceVoice, endOfSourceVoice); durationOfTargetVoice = endOfTargetVoice - startOfTargetVoice; /* * Copy the voiced part. */ ttarget = startOfTargetVoice + 0.5 * startingPeriod; while (ttarget < endOfTargetVoice) { double tsource, period; long isourcepulse; double tleft = startOfSourceVoice, tright = endOfSourceVoice; int i; for (i = 1; i <= 15; i ++) { double tsourcemid = 0.5 * (tleft + tright); double ttargetmid = startOfTargetVoice + RealTier_getArea (duration, startOfSourceVoice, tsourcemid); if (ttargetmid < ttarget) tleft = tsourcemid; else tright = tsourcemid; } tsource = 0.5 * (tleft + tright); period = 1.0 / RealTier_getValueAtTime (pitch, tsource); isourcepulse = PointProcess_getNearestIndex (pulses, tsource); copyBell2 (me, pulses, isourcepulse, period, period, thee.get(), ttarget, maxT); ttarget += period; } deltat += durationOfTargetVoice - durationOfSourceVoice; handledTime = endOfSourceVoice; } /* * Copy the remaining unvoiced part, if we are at the end. */ startOfSourceNoise = handledTime; endOfSourceNoise = my xmax; durationOfSourceNoise = endOfSourceNoise - startOfSourceNoise; startOfTargetNoise = startOfSourceNoise + deltat; endOfTargetNoise = startOfTargetNoise + RealTier_getArea (duration, startOfSourceNoise, endOfSourceNoise); durationOfTargetNoise = endOfTargetNoise - startOfTargetNoise; voicelessPeriod = NUMrandomUniform (0.008, 0.012); ttarget = startOfTargetNoise + 0.5 * voicelessPeriod; while (ttarget < endOfTargetNoise) { double tsource; double tleft = startOfSourceNoise, tright = endOfSourceNoise; for (int i = 1; i <= 15; i ++) { double tsourcemid = 0.5 * (tleft + tright); double ttargetmid = startOfTargetNoise + RealTier_getArea (duration, startOfSourceNoise, tsourcemid); if (ttargetmid < ttarget) tleft = tsourcemid; else tright = tsourcemid; } tsource = 0.5 * (tleft + tright); copyBell (me, tsource, voicelessPeriod, voicelessPeriod, thee.get(), ttarget); voicelessPeriod = NUMrandomUniform (0.008, 0.012); ttarget += voicelessPeriod; } /* * Find the number of trailing zeroes and hack the sound's time domain. */ thy xmax = thy xmin + RealTier_getArea (duration, my xmin, my xmax); if (fabs (thy xmax - my xmax) < 1e-12) thy xmax = my xmax; // common situation thy nx = Sampled_xToLowIndex (thee.get(), thy xmax); if (thy nx > 3 * my nx) thy nx = 3 * my nx; return thee; } catch (MelderError) { Melder_throw (me, U": not manipulated."); } }
PowerCepstrogram Sound_to_PowerCepstrogram_hillenbrand (Sound me, double minimumPitch, double dt) { try { // minimum analysis window has 3 periods of lowest pitch double analysisWidth = 3 / minimumPitch; if (analysisWidth > my dx * my nx) { analysisWidth = my dx * my nx; } double t1, samplingFrequency = 1.0 / my dx; autoSound thee; if (samplingFrequency > 30000) { samplingFrequency = samplingFrequency / 2.0; thee.reset (Sound_resample (me, samplingFrequency, 1)); } else { thee.reset (Data_copy (me)); } // pre-emphasis with fixed coefficient 0.9 for (long i = thy nx; i > 1; i--) { thy z[1][i] -= 0.9 * thy z[1][i - 1]; } long nosInWindow = (long) floor (analysisWidth * samplingFrequency), nFrames; if (nosInWindow < 8) { Melder_throw (U"Analysis window too short."); } Sampled_shortTermAnalysis (thee.peek(), analysisWidth, dt, & nFrames, & t1); autoNUMvector<double> hamming (1, nosInWindow); for (long i = 1; i <= nosInWindow; i++) { hamming[i] = 0.54 -0.46 * cos(2 * NUMpi * (i - 1) / (nosInWindow - 1)); } long nfft = 8; // minimum possible while (nfft < nosInWindow) { nfft *= 2; } long nfftdiv2 = nfft / 2; autoNUMvector<double> fftbuf (1, nfft); // "complex" array autoNUMvector<double> spectrum (1, nfftdiv2 + 1); // +1 needed autoNUMfft_Table fftTable; NUMfft_Table_init (&fftTable, nfft); // sound to spectrum double qmax = 0.5 * nfft / samplingFrequency, dq = qmax / (nfftdiv2 + 1); autoPowerCepstrogram him = PowerCepstrogram_create (my xmin, my xmax, nFrames, dt, t1, 0, qmax, nfftdiv2+1, dq, 0); autoMelderProgress progress (U"Cepstrogram analysis"); for (long iframe = 1; iframe <= nFrames; iframe++) { double tbegin = t1 + (iframe - 1) * dt - analysisWidth / 2; tbegin = tbegin < thy xmin ? thy xmin : tbegin; long istart = Sampled_xToLowIndex (thee.peek(), tbegin); // ppgb: afronding naar beneden? istart = istart < 1 ? 1 : istart; long iend = istart + nosInWindow - 1; iend = iend > thy nx ? thy nx : iend; for (long i = 1; i <= nosInWindow; i++) { fftbuf[i] = thy z[1][istart + i - 1] * hamming[i]; } for (long i = nosInWindow + 1; i <= nfft; i++) { fftbuf[i] = 0; } NUMfft_forward (&fftTable, fftbuf.peek()); complexfftoutput_to_power (fftbuf.peek(), nfft, spectrum.peek(), true); // log10(|fft|^2) // subtract average double specmean = spectrum[1]; for (long i = 2; i <= nfftdiv2 + 1; i++) { specmean += spectrum[i]; } specmean /= nfftdiv2 + 1; for (long i = 1; i <= nfftdiv2 + 1; i++) { spectrum[i] -= specmean; } /* * Here we diverge from Hillenbrand as he takes the fft of half of the spectral values. * H. forgets that the actual spectrum has nfft/2+1 values. Thefore, we take the inverse * transform because this keeps the number of samples a power of 2. * At the same time this results in twice as much numbers in the quefrency domain, i.e. we end with nfft/2+1 * numbers while H. has only nfft/4! */ fftbuf[1] = spectrum[1]; for (long i = 2; i < nfftdiv2 + 1; i++) { fftbuf[i+i-2] = spectrum[i]; fftbuf[i+i-1] = 0; } fftbuf[nfft] = spectrum[nfftdiv2 + 1]; NUMfft_backward (&fftTable, fftbuf.peek()); for (long i = 1; i <= nfftdiv2 + 1; i++) { his z[i][iframe] = fftbuf[i] * fftbuf[i]; } if ((iframe % 10) == 1) { Melder_progress ((double) iframe / nFrames, U"Cepstrogram analysis of frame ", iframe, U" out of ", nFrames, U"."); } } return him.transfer(); } catch (MelderError) { Melder_throw (me, U": no Cepstrogram created."); } }
Polygon Sounds_to_Polygon_enclosed (Sound me, Sound thee, int channel, double tmin, double tmax, double ymin, double ymax) { try { bool clip = ymin < ymax; if (my ny > 1 && thy ny > 1 && my ny != thy ny) { Melder_throw ("The numbers of channels of the two sounds have to be equal or 1."); } long numberOfChannels = my ny > thy ny ? my ny : thy ny; if (channel < 1 || channel > numberOfChannels) { Melder_throw ("Channel does not exist."); } // find overlap in the domains with xmin workaround as in Sound_to_Polygon double xmin1 = my x1 - 0.5 * my dx, xmin2 = thy x1 - 0.5 * thy dx ; double xmin = my xmin > thy xmin ? xmin1 : xmin2; double xmax = my xmax < thy xmax ? xmin1 + my nx * my dx : xmin2 + thy nx * thy dx; if (xmax <= xmin) { Melder_throw ("Domains must overlap."); } if (tmin >= tmax) { tmin = xmin; tmax = xmax; } if (tmin < xmin) { tmin = xmin; } if (tmax > xmax) { tmax = xmax; } if (tmin >= xmax || tmax < xmin) { Melder_throw ("Invalid domain."); } long k = 1; long ib1 = Sampled_xToHighIndex (me, tmin); long ie1 = Sampled_xToLowIndex (me, tmax); long n1 = ie1 - ib1 + 1; long ib2 = Sampled_xToHighIndex (thee, tmin); long ie2 = Sampled_xToLowIndex (thee, tmax); long n2 = ie2 - ib2 + 1; long numberOfPoints = n1 + n2 + 4; // me + thee + begin + endpoint + closing autoPolygon him = Polygon_create (numberOfPoints); // my starting point at tmin double y = Vector_getValueAtX (me, tmin, (my ny == 1 ? 1 : channel), Vector_VALUE_INTERPOLATION_LINEAR); his x[k] = tmin; his y[k++] = CLIP_Y (y, ymin, ymax); // my samples for (long i = ib1; i <= ie1; i++) { double t = my x1 + (i - 1) * my dx; y = my z[my ny == 1 ? 1 : channel][i]; his x[k] = t; his y[k++] = CLIP_Y (y, ymin, ymax); } // my end point at tmax y = Vector_getValueAtX (me, tmax, (my ny == 1 ? 1 : channel), Vector_VALUE_INTERPOLATION_LINEAR); his x[k] = tmax; his y[k++] = y; // thy starting point at tmax y = Vector_getValueAtX (thee, tmax, (thy ny == 1 ? 1 : channel), Vector_VALUE_INTERPOLATION_LINEAR); his x[k] = tmax; his y[k++] = y; // thy samples for (long i = ie2; i >= ib2; i--) { double t = thy x1 + (i - 1) * thy dx; y = thy z[thy ny == 1 ? 1 : channel][i]; his x[k] = t; his y[k++] = CLIP_Y (y, ymin, ymax); } // thy end point at tmin y = Vector_getValueAtX (thee, tmin, (thy ny == 1 ? 1 : channel), Vector_VALUE_INTERPOLATION_LINEAR); his x[k] = tmin; his y[k] = y; Melder_assert (k == numberOfPoints); return him.transfer(); } catch (MelderError) { Melder_throw (me, ": no enclosed Polygon created."); } }
static autoFormant Sound_to_Formant_any_inline (Sound me, double dt_in, int numberOfPoles, double halfdt_window, int which, double preemphasisFrequency, double safetyMargin) { double dt = dt_in > 0.0 ? dt_in : halfdt_window / 4.0; double duration = my nx * my dx, t1; double dt_window = 2.0 * halfdt_window; long nFrames = 1 + (long) floor ((duration - dt_window) / dt); long nsamp_window = (long) floor (dt_window / my dx), halfnsamp_window = nsamp_window / 2; if (nsamp_window < numberOfPoles + 1) Melder_throw (U"Window too short."); t1 = my x1 + 0.5 * (duration - my dx - (nFrames - 1) * dt); // centre of first frame if (nFrames < 1) { nFrames = 1; t1 = my x1 + 0.5 * duration; dt_window = duration; nsamp_window = my nx; } autoFormant thee = Formant_create (my xmin, my xmax, nFrames, dt, t1, (numberOfPoles + 1) / 2); // e.g. 11 poles -> maximally 6 formants autoNUMvector <double> window (1, nsamp_window); autoNUMvector <double> frame (1, nsamp_window); autoNUMvector <double> cof (1, numberOfPoles); // superfluous if which==2, but nobody uses that anyway autoMelderProgress progress (U"Formant analysis..."); /* Pre-emphasis. */ Sound_preEmphasis (me, preemphasisFrequency); /* Gaussian window. */ for (long i = 1; i <= nsamp_window; i ++) { double imid = 0.5 * (nsamp_window + 1), edge = exp (-12.0); window [i] = (exp (-48.0 * (i - imid) * (i - imid) / (nsamp_window + 1) / (nsamp_window + 1)) - edge) / (1.0 - edge); } for (long iframe = 1; iframe <= nFrames; iframe ++) { double t = Sampled_indexToX (thee.peek(), iframe); long leftSample = Sampled_xToLowIndex (me, t); long rightSample = leftSample + 1; long startSample = rightSample - halfnsamp_window; long endSample = leftSample + halfnsamp_window; double maximumIntensity = 0.0; if (startSample < 1) startSample = 1; if (endSample > my nx) endSample = my nx; for (long i = startSample; i <= endSample; i ++) { double value = Sampled_getValueAtSample (me, i, Sound_LEVEL_MONO, 0); if (value * value > maximumIntensity) { maximumIntensity = value * value; } } if (maximumIntensity == HUGE_VAL) Melder_throw (U"Sound contains infinities."); thy d_frames [iframe]. intensity = maximumIntensity; if (maximumIntensity == 0.0) continue; // Burg cannot stand all zeroes /* Copy a pre-emphasized window to a frame. */ for (long j = 1, i = startSample; j <= nsamp_window; j ++) frame [j] = Sampled_getValueAtSample (me, i ++, Sound_LEVEL_MONO, 0) * window [j]; if (which == 1) { burg (frame.peek(), endSample - startSample + 1, cof.peek(), numberOfPoles, & thy d_frames [iframe], 0.5 / my dx, safetyMargin); } else if (which == 2) { if (! splitLevinson (frame.peek(), endSample - startSample + 1, numberOfPoles, & thy d_frames [iframe], 0.5 / my dx)) { Melder_clearError (); Melder_casual (U"(Sound_to_Formant:)" U" Analysis results of frame ", iframe, U" will be wrong." ); } } Melder_progress ((double) iframe / (double) nFrames, U"Formant analysis: frame ", iframe); } Formant_sort (thee.peek()); return thee; }
static void menu_cb_Paste (SoundEditor me, EDITOR_ARGS_DIRECT) { Sound sound = (Sound) my data; long leftSample = Sampled_xToLowIndex (sound, my d_endSelection); long oldNumberOfSamples = sound -> nx, newNumberOfSamples; double **oldData = sound -> z; if (! Sound_clipboard) { Melder_warning (U"Clipboard is empty; nothing pasted."); return; } if (Sound_clipboard -> ny != sound -> ny) Melder_throw (U"Cannot paste, because\n" U"the number of channels of the clipboard is not equal to\n" U"the number of channels of the edited sound."); if (Sound_clipboard -> dx != sound -> dx) Melder_throw (U"Cannot paste, because\n" U"the sampling frequency of the clipboard is not equal to\n" U"the sampling frequency of the edited sound."); if (leftSample < 0) leftSample = 0; if (leftSample > oldNumberOfSamples) leftSample = oldNumberOfSamples; newNumberOfSamples = oldNumberOfSamples + Sound_clipboard -> nx; /* * Check without change. */ autoNUMmatrix <double> newData (1, sound -> ny, 1, newNumberOfSamples); for (long channel = 1; channel <= sound -> ny; channel ++) { long j = 0; for (long i = 1; i <= leftSample; i ++) { newData [channel] [++ j] = oldData [channel] [i]; } for (long i = 1; i <= Sound_clipboard -> nx; i ++) { newData [channel] [++ j] = Sound_clipboard -> z [channel] [i]; } for (long i = leftSample + 1; i <= oldNumberOfSamples; i ++) { newData [channel] [++ j] = oldData [channel] [i]; } } Editor_save (me, U"Paste"); /* * Change without error. */ NUMmatrix_free <double> (oldData, 1, 1); sound -> xmin = 0.0; sound -> xmax = newNumberOfSamples * sound -> dx; sound -> nx = newNumberOfSamples; sound -> x1 = 0.5 * sound -> dx; sound -> z = newData.transfer(); /* Start updating the markers of the FunctionEditor, respecting the invariants. */ my tmin = sound -> xmin; my tmax = sound -> xmax; my d_startSelection = leftSample * sound -> dx; my d_endSelection = (leftSample + Sound_clipboard -> nx) * sound -> dx; /* Force FunctionEditor to show changes. */ Matrix_getWindowExtrema (sound, 1, sound -> nx, 1, sound -> ny, & my d_sound.minimum, & my d_sound.maximum); my v_reset_analysis (); FunctionEditor_ungroup (me); FunctionEditor_marksChanged (me, false); Editor_broadcastDataChanged (me); }
autoSound ComplexSpectrogram_to_Sound (ComplexSpectrogram me, double stretchFactor) { try { /* original number of samples is odd: imaginary part of last spectral value is zero -> * phase is either zero or +/-pi */ double pi = atan2 (0.0, - 0.5); double samplingFrequency = 2.0 * my ymax; double lastFrequency = my y1 + (my ny - 1) * my dy, lastPhase = my phase[my ny][1]; int originalNumberOfSamplesProbablyOdd = (lastPhase != 0.0 && lastPhase != pi && lastPhase != -pi) || my ymax - lastFrequency > 0.25 * my dx; if (my y1 != 0.0) { Melder_throw (U"A Fourier-transformable ComplexSpectrogram must have a first frequency of 0 Hz, not ", my y1, U" Hz."); } long nsamp_window = 2 * my ny - (originalNumberOfSamplesProbablyOdd ? 1 : 2 ); long halfnsamp_window = nsamp_window / 2; double synthesisWindowDuration = nsamp_window / samplingFrequency; autoSpectrum spectrum = Spectrum_create (my ymax, my ny); autoSound synthesisWindow = Sound_createSimple (1, synthesisWindowDuration, samplingFrequency); double newDuration = (my xmax - my xmin) * stretchFactor; autoSound thee = Sound_createSimple (1, newDuration, samplingFrequency); //TODO double thyStartTime; for (long iframe = 1; iframe <= my nx; iframe++) { // "original" sound : double tmid = Sampled_indexToX (me, iframe); long leftSample = Sampled_xToLowIndex (thee.get(), tmid); long rightSample = leftSample + 1; long startSample = rightSample - halfnsamp_window; double startTime = Sampled_indexToX (thee.get(), startSample); if (iframe == 1) { thyStartTime = Sampled_indexToX (thee.get(), startSample); } //long endSample = leftSample + halfnsamp_window; // New Sound with stretch long thyStartSample = Sampled_xToLowIndex (thee.get(),thyStartTime); double thyEndTime = thyStartTime + my dx * stretchFactor; long thyEndSample = Sampled_xToLowIndex (thee.get(), thyEndTime); long stretchedStepSizeSamples = thyEndSample - thyStartSample + 1; //double extraTime = (thyStartSample - startSample + 1) * thy dx; double extraTime = (thyStartTime - startTime); spectrum -> z[1][1] = sqrt (my z[1][iframe]); for (long ifreq = 2; ifreq <= my ny; ifreq++) { double f = my y1 + (ifreq - 1) * my dy; double a = sqrt (my z[ifreq][iframe]); double phi = my phase[ifreq][iframe], intPart; double extraPhase = 2.0 * pi * modf (extraTime * f, &intPart); // fractional part phi += extraPhase; spectrum -> z[1][ifreq] = a * cos (phi); spectrum -> z[2][ifreq] = a * sin (phi); } autoSound synthesis = Spectrum_to_Sound (spectrum.get()); // Where should the sound be placed? long thyEndSampleP = (long) floor (fmin (thyStartSample + synthesis -> nx - 1, thyStartSample + stretchedStepSizeSamples - 1)); // guard against extreme stretches if (iframe == my nx) { thyEndSampleP = (long) floor (fmin (thy nx, thyStartSample + synthesis -> nx - 1)); // ppgb: waarom naar beneden afgerond? } for (long j = thyStartSample; j <= thyEndSampleP; j++) { thy z[1][j] = synthesis -> z[1][j - thyStartSample + 1]; } thyStartTime += my dx * stretchFactor; } return thee; } catch (MelderError) { Melder_throw (me, U": no Sound created."); } }