fextract_t *fextract_pitch_energy_create(int frame_len, char *m_e_params) { fextract_t *fex; dsp_fextract_t *mfcc; fex = (fextract_t *) rs_malloc(sizeof(fextract_t),"emotion feature extraction data"); fex->n_features = V2_N_FEATURES; /* Abtastrate etc. ... */ fex->samplerate = SAMPLERATE; fex->frame_len = frame_len; /* global frame length (in frames) */ fex->pitch = pitch_create(AC_GAUSS); fex->frame_shift = fex->frame_len - ((fex->pitch->method == AC_GAUSS? 2 : 1 ) * fex->pitch->periodsPerWindow / fex->pitch->minimumPitch - fex->pitch->dt) * fex->samplerate ; /* global shift (in frames) */ fex->hnr = NULL; fex->vq = NULL; /* MFCCs ... */ mfcc = (dsp_fextract_t *) rs_malloc(sizeof(dsp_fextract_t), "feature extraction data"); mfcc->type = dsp_fextype_MFCC; mfcc->version = DSP_MK_VERSION(1, 4); if (!dsp_mfcc_create(mfcc, m_e_params)) { rs_free(mfcc); mfcc = NULL; rs_warning("Could not initialize MFCC configuration!"); } fex->mfcc=mfcc; return(fex); }
void EmoVoicePitch::transform_enter (ssi_stream_t &stream_in, ssi_stream_t &stream_out, ssi_size_t xtra_stream_in_num, ssi_stream_t xtra_stream_in[]) { if (!_cfg) { _cfg = pitch_create (ssi_cast (pitch_method_t, _options.method)); _cfg->minimumPitch = ssi_cast (mx_real_t, _options.minfreq); _cfg->maximumPitch = ssi_cast (mx_real_t, _options.maxfreq); } }
void EmoVoicePitch::transform (ITransformer::info info, ssi_stream_t &stream_in, ssi_stream_t &stream_out, ssi_size_t xtra_stream_in_num, ssi_stream_t xtra_stream_in[]) { ssi_size_t sample_number = stream_in.num; short *srcptr = ssi_pcast (short, stream_in.ptr); mx_real_t *dstptr = ssi_pcast (mx_real_t, stream_out.ptr); mx_real_t *p = pitch_calc (_cfg, srcptr, sample_number); memcpy (dstptr, p, _cfg->nframes * sizeof (mx_real_t)); pitch_destroy (_cfg); _cfg = pitch_create (ssi_cast (pitch_method_t, _options.method)); _cfg->minimumPitch = ssi_cast (mx_real_t, _options.minfreq); _cfg->maximumPitch = ssi_cast (mx_real_t, _options.maxfreq); free (p); }
ssi_size_t EmoVoicePitch::getSampleNumberOut (ssi_size_t sample_number_in) { // check if we already know the result if (sample_number_in == _sample_number_in) { return _sample_number_out; } if (!_cfg) { _cfg = pitch_create (ssi_cast (pitch_method_t, _options.method)); _cfg->minimumPitch = ssi_cast (mx_real_t, _options.minfreq); _cfg->maximumPitch = ssi_cast (mx_real_t, _options.maxfreq); } // calculate new sample number pitch_method_t method = _cfg->method; mx_real_t minimumPitch = _cfg->minimumPitch; mx_real_t dt = _cfg->dt; int periodsPerWindow = _cfg->periodsPerWindow; if (method == AC_GAUSS) periodsPerWindow *= 2; mx_real_t x1 = (mx_real_t) 0.5/SAMPLERATE; mx_real_t dt_window = periodsPerWindow / minimumPitch; int nFrames; mx_real_t t1; int result = fitInFrame (method >= FCC_NORMAL ? 1 / minimumPitch + dt_window : dt_window, dt, &nFrames, &t1, x1, sample_number_in); SSI_ASSERT (result); if (method >= FCC_NORMAL) nFrames = nFrames - 1; _sample_number_out = nFrames; return nFrames; }
bool EffectSBSMS::Process() { if(!bInit) { sbsms_init(4096); bInit = TRUE; } bool bGoodResult = true; //Iterate over each track //Track::All is needed because this effect needs to introduce silence in the group tracks to keep sync this->CopyInputTracks(Track::All); // Set up mOutputTracks. TrackListIterator iter(mOutputTracks); Track* t; mCurTrackNum = 0; double maxDuration = 0.0; if(rateStart == rateEnd) mTotalStretch = 1.0/rateStart; else mTotalStretch = 1.0/(rateEnd-rateStart)*log(rateEnd/rateStart); // Must sync if selection length will change bool mustSync = (mTotalStretch != 1.0); t = iter.First(); while (t != NULL) { if (t->GetKind() == Track::Label && (t->GetSelected() || (mustSync && t->IsSynchroSelected())) ) { if (!ProcessLabelTrack(t)) { bGoodResult = false; break; } } else if (t->GetKind() == Track::Wave && t->GetSelected() ) { WaveTrack* leftTrack = (WaveTrack*)t; //Get start and end times from track mCurT0 = leftTrack->GetStartTime(); mCurT1 = leftTrack->GetEndTime(); //Set the current bounds to whichever left marker is //greater and whichever right marker is less mCurT0 = wxMax(mT0, mCurT0); mCurT1 = wxMin(mT1, mCurT1); // Process only if the right marker is to the right of the left marker if (mCurT1 > mCurT0) { sampleCount start; sampleCount end; start = leftTrack->TimeToLongSamples(mCurT0); end = leftTrack->TimeToLongSamples(mCurT1); WaveTrack* rightTrack = NULL; if (leftTrack->GetLinked()) { double t; rightTrack = (WaveTrack*)(iter.Next()); //Adjust bounds by the right tracks markers t = rightTrack->GetStartTime(); t = wxMax(mT0, t); mCurT0 = wxMin(mCurT0, t); t = rightTrack->GetEndTime(); t = wxMin(mT1, t); mCurT1 = wxMax(mCurT1, t); //Transform the marker timepoints to samples start = leftTrack->TimeToLongSamples(mCurT0); end = leftTrack->TimeToLongSamples(mCurT1); mCurTrackNum++; // Increment for rightTrack, too. } sampleCount trackEnd = leftTrack->TimeToLongSamples(leftTrack->GetEndTime()); // SBSMS has a fixed sample rate - we just convert to its sample rate and then convert back float srIn = leftTrack->GetRate(); float srSBSMS = 44100.0; // the resampler needs a callback to supply its samples resampleBuf rb; sampleCount maxBlockSize = leftTrack->GetMaxBlockSize(); rb.block = maxBlockSize; rb.buf = (audio*)calloc(rb.block,sizeof(audio)); rb.leftTrack = leftTrack; rb.rightTrack = rightTrack?rightTrack:leftTrack; rb.leftBuffer = (float*)calloc(maxBlockSize,sizeof(float)); rb.rightBuffer = (float*)calloc(maxBlockSize,sizeof(float)); rb.offset = start; rb.end = trackEnd; rb.ratio = srSBSMS/srIn; rb.resampler = new Resampler(resampleCB, &rb); // Samples in selection sampleCount samplesIn = end-start; // Samples for SBSMS to process after resampling sampleCount samplesToProcess = (sampleCount) ((real)samplesIn*(srSBSMS/srIn)); // Samples in output after resampling back sampleCount samplesToGenerate = (sampleCount) ((real)samplesToProcess * mTotalStretch); sampleCount samplesOut = (sampleCount) ((real)samplesIn * mTotalStretch); double duration = (mCurT1-mCurT0) * mTotalStretch; if(duration > maxDuration) maxDuration = duration; TimeWarper *warper = NULL; if (rateStart == rateEnd) { warper = new LinearTimeWarper(mCurT0, mCurT0, mCurT1, mCurT0+maxDuration); } else { warper = new LogarithmicTimeWarper(mCurT0, mCurT1, rateStart, rateEnd); } SetTimeWarper(warper); sbsmsInfo si; si.rs = rb.resampler; si.samplesToProcess = samplesToProcess; si.samplesToGenerate = samplesToGenerate; si.stretch0 = rateStart; si.stretch1 = rateEnd; si.ratio0 = pitchStart; si.ratio1 = pitchEnd; rb.sbsmser = sbsms_create(&samplesCB,&stretchCB,&ratioCB,rightTrack?2:1,quality,bPreAnalyze,true); rb.pitch = pitch_create(rb.sbsmser,&si,srIn/srSBSMS); rb.outputLeftTrack = mFactory->NewWaveTrack(leftTrack->GetSampleFormat(), leftTrack->GetRate()); if(rightTrack) rb.outputRightTrack = mFactory->NewWaveTrack(rightTrack->GetSampleFormat(), rightTrack->GetRate()); sampleCount blockSize = SBSMS_FRAME_SIZE[quality]; rb.outBuf = (audio*)calloc(blockSize,sizeof(audio)); rb.outputLeftBuffer = (float*)calloc(blockSize*2,sizeof(float)); if(rightTrack) rb.outputRightBuffer = (float*)calloc(blockSize*2,sizeof(float)); long pos = 0; long outputCount = -1; // pre analysis real fracPre = 0.0f; if(bPreAnalyze) { fracPre = 0.05f; resampleBuf rbPre; rbPre.block = maxBlockSize; rbPre.buf = (audio*)calloc(rb.block,sizeof(audio)); rbPre.leftTrack = leftTrack; rbPre.rightTrack = rightTrack?rightTrack:leftTrack; rbPre.leftBuffer = (float*)calloc(maxBlockSize,sizeof(float)); rbPre.rightBuffer = (float*)calloc(maxBlockSize,sizeof(float)); rbPre.offset = start; rbPre.end = end; rbPre.ratio = srSBSMS/srIn; rbPre.resampler = new Resampler(resampleCB, &rbPre); si.rs = rbPre.resampler; long pos = 0; long lastPos = 0; long ret = 0; while(lastPos<samplesToProcess) { ret = sbsms_pre_analyze(&samplesCB,&si,rb.sbsmser); lastPos = pos; pos += ret; real completion = (real)lastPos/(real)samplesToProcess; if (TrackProgress(0,fracPre*completion)) return false; } sbsms_pre_analyze_complete(rb.sbsmser); sbsms_reset(rb.sbsmser); si.rs = rb.resampler; } // process while(pos<samplesOut && outputCount) { long frames; if(pos+blockSize>samplesOut) { frames = samplesOut - pos; } else { frames = blockSize; } outputCount = pitch_process(rb.outBuf, frames, rb.pitch); for(int i = 0; i < outputCount; i++) { rb.outputLeftBuffer[i] = rb.outBuf[i][0]; if(rightTrack) rb.outputRightBuffer[i] = rb.outBuf[i][1]; } pos += outputCount; rb.outputLeftTrack->Append((samplePtr)rb.outputLeftBuffer, floatSample, outputCount); if(rightTrack) rb.outputRightTrack->Append((samplePtr)rb.outputRightBuffer, floatSample, outputCount); double frac = (double)pos/(double)samplesOut; int nWhichTrack = mCurTrackNum; if(rightTrack) { nWhichTrack = 2*(mCurTrackNum/2); if (frac < 0.5) frac *= 2.0; // Show twice as far for each track, because we're doing 2 at once. else { nWhichTrack++; frac -= 0.5; frac *= 2.0; // Show twice as far for each track, because we're doing 2 at once. } } if (TrackProgress(nWhichTrack, fracPre + (1.0-fracPre)*frac)) return false; } rb.outputLeftTrack->Flush(); if(rightTrack) rb.outputRightTrack->Flush(); leftTrack->ClearAndPaste(mCurT0, mCurT1, rb.outputLeftTrack, true, false, GetTimeWarper()); if(rightTrack) { rightTrack->ClearAndPaste(mCurT0, mCurT1, rb.outputRightTrack, true, false, GetTimeWarper()); } } mCurTrackNum++; } else if (mustSync && t->IsSynchroSelected()) { t->SyncAdjust(mCurT1, mCurT0 + (mCurT1 - mCurT0) * mTotalStretch); } //Iterate to the next track t = iter.Next(); } if (bGoodResult) ReplaceProcessedTracks(bGoodResult); // Update selection mT0 = mCurT0; mT1 = mCurT0 + maxDuration; return bGoodResult; }
pitch_t *hnr_create() { pitch_t *hnr=pitch_create(AC_HANNING); hnr=pitch_hnr_configure(hnr); return hnr; }