Example #1
fextract_t *fextract_pitch_energy_create(int frame_len, char *m_e_params) {
	fextract_t *fex;
	dsp_fextract_t *mfcc;

	fex = (fextract_t *) rs_malloc(sizeof(fextract_t),"emotion feature extraction data");
	fex->n_features = V2_N_FEATURES;

	/* Abtastrate etc. ... */
	fex->samplerate = SAMPLERATE;
	fex->frame_len = frame_len; /* global frame length (in frames) */
	fex->pitch = pitch_create(AC_GAUSS);
	fex->frame_shift = fex->frame_len - ((fex->pitch->method == AC_GAUSS? 2 : 1 ) * fex->pitch->periodsPerWindow / fex->pitch->minimumPitch - fex->pitch->dt) * fex->samplerate ; /* global shift (in frames) */

	fex->hnr = NULL;
	fex->vq = NULL;
	/* MFCCs ... */
	mfcc = (dsp_fextract_t *) rs_malloc(sizeof(dsp_fextract_t), "feature extraction data");
	mfcc->type = dsp_fextype_MFCC;
	mfcc->version = DSP_MK_VERSION(1, 4);
	if (!dsp_mfcc_create(mfcc, m_e_params)) {
		mfcc = NULL;
		rs_warning("Could not initialize MFCC configuration!");
Example #2
void EmoVoicePitch::transform_enter (ssi_stream_t &stream_in,
	ssi_stream_t &stream_out,
	ssi_size_t xtra_stream_in_num,
	ssi_stream_t xtra_stream_in[]) {

	if (!_cfg) {
		_cfg = pitch_create (ssi_cast (pitch_method_t, _options.method));
		_cfg->minimumPitch = ssi_cast (mx_real_t, _options.minfreq);
		_cfg->maximumPitch = ssi_cast (mx_real_t, _options.maxfreq);
Example #3
void EmoVoicePitch::transform (ITransformer::info info,
	ssi_stream_t &stream_in,
	ssi_stream_t &stream_out,
	ssi_size_t xtra_stream_in_num,
	ssi_stream_t xtra_stream_in[]) {

	ssi_size_t sample_number = stream_in.num;

	short *srcptr = ssi_pcast (short, stream_in.ptr);
	mx_real_t *dstptr = ssi_pcast (mx_real_t, stream_out.ptr);

	mx_real_t *p = pitch_calc (_cfg, srcptr, sample_number);
	memcpy (dstptr, p, _cfg->nframes * sizeof (mx_real_t));

	pitch_destroy (_cfg);
	_cfg = pitch_create (ssi_cast (pitch_method_t, _options.method));
	_cfg->minimumPitch = ssi_cast (mx_real_t, _options.minfreq);
	_cfg->maximumPitch = ssi_cast (mx_real_t, _options.maxfreq);

	free (p);
Example #4
ssi_size_t EmoVoicePitch::getSampleNumberOut (ssi_size_t sample_number_in) {

	// check if we already know the result
	if (sample_number_in == _sample_number_in) {
		return _sample_number_out;

	if (!_cfg) {
		_cfg = pitch_create (ssi_cast (pitch_method_t, _options.method));
		_cfg->minimumPitch = ssi_cast (mx_real_t, _options.minfreq);
		_cfg->maximumPitch = ssi_cast (mx_real_t, _options.maxfreq);

	// calculate new sample number
	pitch_method_t method = _cfg->method;
	mx_real_t minimumPitch = _cfg->minimumPitch;
	mx_real_t dt = _cfg->dt;
	int periodsPerWindow = _cfg->periodsPerWindow;
	if (method == AC_GAUSS) 
		 periodsPerWindow *= 2;
	mx_real_t x1 = (mx_real_t) 0.5/SAMPLERATE;
	mx_real_t dt_window = periodsPerWindow / minimumPitch;

	int nFrames;
	mx_real_t t1;

	int result = fitInFrame (method >= FCC_NORMAL ? 1 / minimumPitch + dt_window : dt_window, dt, &nFrames, &t1, x1, sample_number_in);
	SSI_ASSERT (result);

	if (method >= FCC_NORMAL)
		nFrames = nFrames - 1;

	_sample_number_out = nFrames;

	return nFrames;
Example #5
bool EffectSBSMS::Process()
   if(!bInit) {
      bInit = TRUE;
   bool bGoodResult = true;
   //Iterate over each track
   //Track::All is needed because this effect needs to introduce silence in the group tracks to keep sync
   this->CopyInputTracks(Track::All); // Set up mOutputTracks.
   TrackListIterator iter(mOutputTracks);
   Track* t;
   mCurTrackNum = 0;

   double maxDuration = 0.0;

   if(rateStart == rateEnd)
      mTotalStretch = 1.0/rateStart;
      mTotalStretch = 1.0/(rateEnd-rateStart)*log(rateEnd/rateStart);

   // Must sync if selection length will change
   bool mustSync = (mTotalStretch != 1.0);

   t = iter.First();
   while (t != NULL) {
      if (t->GetKind() == Track::Label && 
            (t->GetSelected() || (mustSync && t->IsSynchroSelected())) )
         if (!ProcessLabelTrack(t)) {
            bGoodResult = false;
      else if (t->GetKind() == Track::Wave && t->GetSelected() )
         WaveTrack* leftTrack = (WaveTrack*)t;

         //Get start and end times from track
         mCurT0 = leftTrack->GetStartTime();
         mCurT1 = leftTrack->GetEndTime();
         //Set the current bounds to whichever left marker is
         //greater and whichever right marker is less
         mCurT0 = wxMax(mT0, mCurT0);
         mCurT1 = wxMin(mT1, mCurT1);
         // Process only if the right marker is to the right of the left marker
         if (mCurT1 > mCurT0) {
            sampleCount start;
            sampleCount end;
            start = leftTrack->TimeToLongSamples(mCurT0);
            end = leftTrack->TimeToLongSamples(mCurT1);
            WaveTrack* rightTrack = NULL;
            if (leftTrack->GetLinked()) {
               double t;
               rightTrack = (WaveTrack*)(iter.Next());
               //Adjust bounds by the right tracks markers
               t = rightTrack->GetStartTime();
               t = wxMax(mT0, t);
               mCurT0 = wxMin(mCurT0, t);
               t = rightTrack->GetEndTime();
               t = wxMin(mT1, t);
               mCurT1 = wxMax(mCurT1, t);
               //Transform the marker timepoints to samples
               start = leftTrack->TimeToLongSamples(mCurT0);
               end = leftTrack->TimeToLongSamples(mCurT1);
               mCurTrackNum++; // Increment for rightTrack, too.	
            sampleCount trackEnd = leftTrack->TimeToLongSamples(leftTrack->GetEndTime());

            // SBSMS has a fixed sample rate - we just convert to its sample rate and then convert back
            float srIn = leftTrack->GetRate();
            float srSBSMS = 44100.0;
            // the resampler needs a callback to supply its samples
            resampleBuf rb;
            sampleCount maxBlockSize = leftTrack->GetMaxBlockSize();
            rb.block = maxBlockSize;
            rb.buf = (audio*)calloc(rb.block,sizeof(audio));
            rb.leftTrack = leftTrack;
            rb.rightTrack = rightTrack?rightTrack:leftTrack;
            rb.leftBuffer = (float*)calloc(maxBlockSize,sizeof(float));
            rb.rightBuffer = (float*)calloc(maxBlockSize,sizeof(float));
            rb.offset = start;
            rb.end = trackEnd;
            rb.ratio = srSBSMS/srIn;
            rb.resampler = new Resampler(resampleCB, &rb);
            // Samples in selection
            sampleCount samplesIn = end-start;
            // Samples for SBSMS to process after resampling
            sampleCount samplesToProcess = (sampleCount) ((real)samplesIn*(srSBSMS/srIn));
            // Samples in output after resampling back
            sampleCount samplesToGenerate = (sampleCount) ((real)samplesToProcess * mTotalStretch);
            sampleCount samplesOut = (sampleCount) ((real)samplesIn * mTotalStretch);
            double duration =  (mCurT1-mCurT0) * mTotalStretch;

            if(duration > maxDuration)
               maxDuration = duration;

            TimeWarper *warper = NULL;
            if (rateStart == rateEnd)
               warper = new LinearTimeWarper(mCurT0, mCurT0,
                                             mCurT1, mCurT0+maxDuration);
            } else
               warper = new LogarithmicTimeWarper(mCurT0, mCurT1,
                                                  rateStart, rateEnd);
            sbsmsInfo si;
            si.rs = rb.resampler;
            si.samplesToProcess = samplesToProcess;
            si.samplesToGenerate = samplesToGenerate;
            si.stretch0 = rateStart;
            si.stretch1 = rateEnd;
            si.ratio0 = pitchStart;
            si.ratio1 = pitchEnd;
            rb.sbsmser = sbsms_create(&samplesCB,&stretchCB,&ratioCB,rightTrack?2:1,quality,bPreAnalyze,true);
            rb.pitch = pitch_create(rb.sbsmser,&si,srIn/srSBSMS);
            rb.outputLeftTrack = mFactory->NewWaveTrack(leftTrack->GetSampleFormat(),
               rb.outputRightTrack = mFactory->NewWaveTrack(rightTrack->GetSampleFormat(),
            sampleCount blockSize = SBSMS_FRAME_SIZE[quality];
            rb.outBuf = (audio*)calloc(blockSize,sizeof(audio));
            rb.outputLeftBuffer = (float*)calloc(blockSize*2,sizeof(float));
               rb.outputRightBuffer = (float*)calloc(blockSize*2,sizeof(float));
            long pos = 0;
            long outputCount = -1;
            // pre analysis
            real fracPre = 0.0f;
            if(bPreAnalyze) {
               fracPre = 0.05f;
               resampleBuf rbPre;
               rbPre.block = maxBlockSize;
               rbPre.buf = (audio*)calloc(rb.block,sizeof(audio));
               rbPre.leftTrack = leftTrack;
               rbPre.rightTrack = rightTrack?rightTrack:leftTrack;
               rbPre.leftBuffer = (float*)calloc(maxBlockSize,sizeof(float));
               rbPre.rightBuffer = (float*)calloc(maxBlockSize,sizeof(float));
               rbPre.offset = start;
               rbPre.end = end;
               rbPre.ratio = srSBSMS/srIn;
               rbPre.resampler = new Resampler(resampleCB, &rbPre);
               si.rs = rbPre.resampler;
               long pos = 0;
               long lastPos = 0;
               long ret = 0;
               while(lastPos<samplesToProcess) {
                  ret = sbsms_pre_analyze(&samplesCB,&si,rb.sbsmser);
                  lastPos = pos;
                  pos += ret;
                  real completion = (real)lastPos/(real)samplesToProcess;
                  if (TrackProgress(0,fracPre*completion))
                     return false;
               si.rs = rb.resampler;
            // process
            while(pos<samplesOut && outputCount) {
               long frames;
               if(pos+blockSize>samplesOut) {
                  frames = samplesOut - pos;
               } else {
                  frames = blockSize;
               outputCount = pitch_process(rb.outBuf, frames, rb.pitch);
               for(int i = 0; i < outputCount; i++) {
                  rb.outputLeftBuffer[i] = rb.outBuf[i][0];
                     rb.outputRightBuffer[i] = rb.outBuf[i][1];
               pos += outputCount;
               rb.outputLeftTrack->Append((samplePtr)rb.outputLeftBuffer, floatSample, outputCount);
                  rb.outputRightTrack->Append((samplePtr)rb.outputRightBuffer, floatSample, outputCount);
               double frac = (double)pos/(double)samplesOut;
               int nWhichTrack = mCurTrackNum;
               if(rightTrack) {
                  nWhichTrack = 2*(mCurTrackNum/2);
                  if (frac < 0.5)
                     frac *= 2.0; // Show twice as far for each track, because we're doing 2 at once. 
                  else {
                     frac -= 0.5;
                     frac *= 2.0; // Show twice as far for each track, because we're doing 2 at once. 
               if (TrackProgress(nWhichTrack, fracPre + (1.0-fracPre)*frac))
                  return false;
            leftTrack->ClearAndPaste(mCurT0, mCurT1, rb.outputLeftTrack,
                  true, false, GetTimeWarper());

            if(rightTrack) {
               rightTrack->ClearAndPaste(mCurT0, mCurT1, rb.outputRightTrack,
                     true, false, GetTimeWarper());
      else if (mustSync && t->IsSynchroSelected())
         t->SyncAdjust(mCurT1, mCurT0 + (mCurT1 - mCurT0) * mTotalStretch);
      //Iterate to the next track
      t = iter.Next();
   if (bGoodResult)

   // Update selection
   mT0 = mCurT0;
   mT1 = mCurT0 + maxDuration;
   return bGoodResult;
Example #6
pitch_t *hnr_create() {
    pitch_t *hnr=pitch_create(AC_HANNING);
    return hnr;