Ejemplo n.º 1
0
static VALUE t_to_mel(VALUE self, VALUE m) {
  return rb_float_new(mel(NUM2DBL(m)));
}
Ejemplo n.º 2
0
void
whs_extractor_process (WhsExtractor *self, const gfloat *in, WhsFeatureVector *ret)
{
  gdouble tmp;

  // Calculate MFCC
  // http://de.wikipedia.org/wiki/MFCC

  // Copy to our temporary array and apply hamming window
  
  gdouble *freqdata = self->priv->fft.freqdata;

  for (gint i = 0; i < self->frame_length; i++) {
    freqdata[i] = in[i] * self->priv->cos[i];
  }

  // Take FFT
  rdft (self->frame_length, 1, freqdata, self->priv->fft.ip, self->priv->fft.w);

  // Store magnitude spectrum in freqdata[0...n/2]
  for (guint i = 0; i < self->frame_length; i += 2) {
    gdouble cur;
    if (i == 0) {
      freqdata[0] = freqdata[0] * freqdata[0];
      freqdata[1] = freqdata[1] * freqdata[1];
    } else {
      cur = freqdata[i / 2 + 1] = freqdata[i] * freqdata[i] + freqdata[i+1] * freqdata[i+1];
    }
  }

  // Move freqdata[1] to the end, it's for the nyquist frequency!
  tmp = freqdata[1];
  g_memmove (&freqdata[1], &freqdata[2], sizeof (gdouble) * (self->frame_length / 2 - 1));
  freqdata[self->frame_length / 2] = tmp;

  // Take logarithms
  for (gint i = 0; i < self->frame_length / 2 + 1; i++) {
    if (freqdata[i] != 0.0)
      freqdata[i] = CLAMP (log10 (sqrt (freqdata[i]/(self->frame_length*self->frame_length))), -500.0, G_MAXDOUBLE);
    else
      freqdata[i] = -500.0;
  }

  // Convert to mel spectrum
  gdouble bins[32];

#if 0
  {
    gint bin = 0;
    gint step = mel (self->sample_rate / 2) / 32; // 32 bins

    for (bin = 0; bin < 32; bin++) {
      gint i;
      bins[bin] = 0.0;
      
      // Fill bin from 'start' to 'stop' with triangular weighting
      gdouble start = (bin > 0) ? bin * step - 0.5 * step : 0;
      gdouble stop = (bin < 31) ? (bin + 1) * step + 0.5 * step : (bin + 1) * step;

      for (i = 0; i < self->frame_length / 2 + 1; i++) {
        gdouble f = (((gfloat) i) / ((gfloat) (self->frame_length))) * self->sample_rate;
        bins[bin] += triangle (start, stop, mel (f)) * freqdata[i];
      }
    }
  }
#else
  {
#if 1
    gint i, j = 0, bin = 0;
    gint start_m = (self->min_freq > 0) ? mel (CLAMP (self->min_freq - (self->sample_rate / self->frame_length), 0, self->sample_rate / 2)) : 0;
    gint stop_m = (self->max_freq > 0) ? mel (CLAMP (self->max_freq + (self->sample_rate / self->frame_length), 0, self->sample_rate / 2)) : mel (self->sample_rate / 2);
    gint step = (stop_m - start_m) / 32; // 32 bins

#else
    gint i, j = 0, bin = 0;
    gint start_m = 0;
    gint step =  mel (self->sample_rate / 2) / 32; // 32 bins

#endif

    for (bin = i = 0; bin < 32; bin++, i += j) {
      bins[bin] = 0.0;
      
      // Fill bin
      for (j = 0; (i + j) <= self->frame_length / 2 && mel (((i + j) * (self->sample_rate / 2)) / (self->frame_length / 2)) <= start_m + step * (bin + 1); j++) {
        bins[bin] += freqdata[i+j];
      }

      // Normalize bin
      if (j != 0)
        bins[bin] /= j;
    }
  }
#endif


  // Calculate DCT

  ddct (32, -1, bins, self->priv->dct.ip, self->priv->dct.w);

  for (gint i = 0; i < 32; i++)
    ret->mfcc[i] = bins[i];
}