Example #1
0
void
TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
{
    // calculate autocorrelation function
    // then rcf
    // just hard code for now... don't really need separate functions to do this

    // make acf

    d_vec_t dfframe(dfframe_in);

    MathUtilities::adaptiveThreshold(dfframe);

    d_vec_t acf(dfframe.size());


    for (unsigned int lag=0; lag<dfframe.size(); lag++)
    {
        double sum = 0.;
        double tmp = 0.;

        for (unsigned int n=0; n<(dfframe.size()-lag); n++)
        {
            tmp = dfframe[n] * dfframe[n+lag];
            sum += tmp;
        }
        acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
    }

    // now apply comb filtering
    int numelem = 4;

    for (unsigned int i = 2;i < rcf.size();i++) // max beat period
    {
        for (int a = 1;a <= numelem;a++) // number of comb elements
        {
            for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
            {
                rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.);	// calculate value for comb filter row
            }
        }
    }

    // apply adaptive threshold to rcf
    MathUtilities::adaptiveThreshold(rcf);

    double rcfsum =0.;
    for (unsigned int i=0; i<rcf.size(); i++)
    {
        rcf[i] += EPS ;
        rcfsum += rcf[i];
    }

    // normalise rcf to sum to unity
    for (unsigned int i=0; i<rcf.size(); i++)
    {
        rcf[i] /= (rcfsum + EPS);
    }
}
Example #2
0
void
TempoTrackV2::normalise_vec(d_vec_t &df)
{
    double sum = 0.;
    for (unsigned int i=0; i<df.size(); i++)
    {
        sum += df[i];
    }

    for (unsigned int i=0; i<df.size(); i++)
    {
        df[i]/= (sum + EPS);
    }
}
Example #3
0
double
TempoTrackV2::get_max_val(const d_vec_t &df)
{
    double maxval = 0.;
    for (unsigned int i=0; i<df.size(); i++)
    {
        if (maxval < df[i])
        {
            maxval = df[i];
        }
    }

    return maxval;
}
Example #4
0
int
TempoTrackV2::get_max_ind(const d_vec_t &df)
{
    double maxval = 0.;
    int ind = 0;
    for (unsigned int i=0; i<df.size(); i++)
    {
        if (maxval < df[i])
        {
            maxval = df[i];
            ind = i;
        }
    }

    return ind;
}
Example #5
0
double
DownBeat::measureSpecDiff(d_vec_t oldspec, d_vec_t newspec)
{
    // JENSEN-SHANNON DIVERGENCE BETWEEN SPECTRAL FRAMES

    unsigned int SPECSIZE = 512;   // ONLY LOOK AT FIRST 512 SAMPLES OF SPECTRUM.
    if (SPECSIZE > oldspec.size()/4) {
        SPECSIZE = oldspec.size()/4;
    }
    double SD = 0.;
    double sd1 = 0.;

    double sumnew = 0.;
    double sumold = 0.;

    for (unsigned int i = 0;i < SPECSIZE;i++)
    {
        newspec[i] +=EPS;
        oldspec[i] +=EPS;

        sumnew+=newspec[i];
        sumold+=oldspec[i];
    }

    for (unsigned int i = 0;i < SPECSIZE;i++)
    {
        newspec[i] /= (sumnew);
        oldspec[i] /= (sumold);

        // IF ANY SPECTRAL VALUES ARE 0 (SHOULDN'T BE ANY!) SET THEM TO 1
        if (newspec[i] == 0)
        {
            newspec[i] = 1.;
        }

        if (oldspec[i] == 0)
        {
            oldspec[i] = 1.;
        }

        // JENSEN-SHANNON CALCULATION
        sd1 = 0.5*oldspec[i] + 0.5*newspec[i];
        SD = SD + (-sd1*log(sd1)) + (0.5*(oldspec[i]*log(oldspec[i]))) + (0.5*(newspec[i]*log(newspec[i])));
    }

    return SD;
}
Example #6
0
void
DownBeat::findDownBeats(const float *audio,
                        size_t audioLength,
                        const d_vec_t &beats,
                        i_vec_t &downbeats)
{
    // FIND DOWNBEATS BY PARTITIONING THE INPUT AUDIO FILE INTO BEAT SEGMENTS
    // WHERE THE AUDIO FRAMES ARE DOWNSAMPLED  BY A FACTOR OF 16 (fs ~= 2700Hz)
    // THEN TAKING THE JENSEN-SHANNON DIVERGENCE BETWEEN BEAT SYNCHRONOUS SPECTRAL FRAMES

    // IMPLEMENTATION (MOSTLY) FOLLOWS:
    //  DAVIES AND PLUMBLEY "A SPECTRAL DIFFERENCE APPROACH TO EXTRACTING DOWNBEATS IN MUSICAL AUDIO"
    //  EUSIPCO 2006, FLORENCE, ITALY

    d_vec_t newspec(m_beatframesize / 2); // magnitude spectrum of current beat
    d_vec_t oldspec(m_beatframesize / 2); // magnitude spectrum of previous beat

    m_beatsd.clear();

    if (audioLength == 0) return;

    for (size_t i = 0; i + 1 < beats.size(); ++i) {

        // Copy the extents of the current beat from downsampled array
        // into beat frame buffer

        size_t beatstart = (beats[i] * m_increment) / m_factor;
        size_t beatend = (beats[i+1] * m_increment) / m_factor;
        if (beatend >= audioLength) beatend = audioLength - 1;
        if (beatend < beatstart) beatend = beatstart;
        size_t beatlen = beatend - beatstart;

        // Also apply a Hanning window to the beat frame buffer, sized
        // to the beat extents rather than the frame size.  (Because
        // the size varies, it's easier to do this by hand than use
        // our Window abstraction.)

//        std::cerr << "beatlen = " << beatlen << std::endl;

//        float rms = 0;
        for (size_t j = 0; j < beatlen && j < m_beatframesize; ++j) {
            double mul = 0.5 * (1.0 - cos(TWO_PI * (double(j) / double(beatlen))));
            m_beatframe[j] = audio[beatstart + j] * mul;
//            rms += m_beatframe[j] * m_beatframe[j];
        }
//        rms = sqrt(rms);
//        std::cerr << "beat " << i << ": audio rms " << rms << std::endl;

        for (size_t j = beatlen; j < m_beatframesize; ++j) {
            m_beatframe[j] = 0.0;
        }

        // Now FFT beat frame

        m_fft->process(false, m_beatframe, m_fftRealOut, m_fftImagOut);

        // Calculate magnitudes

        for (size_t j = 0; j < m_beatframesize/2; ++j) {
            newspec[j] = sqrt(m_fftRealOut[j] * m_fftRealOut[j] +
                              m_fftImagOut[j] * m_fftImagOut[j]);
        }

        // Preserve peaks by applying adaptive threshold

        MathUtilities::adaptiveThreshold(newspec);

        // Calculate JS divergence between new and old spectral frames

        if (i > 0) { // otherwise we have no previous frame
            m_beatsd.push_back(measureSpecDiff(oldspec, newspec));
//            std::cerr << "specdiff: " << m_beatsd[m_beatsd.size()-1] << std::endl;
        }

        // Copy newspec across to old

        for (size_t j = 0; j < m_beatframesize/2; ++j) {
            oldspec[j] = newspec[j];
        }
    }

    // We now have all spectral difference measures in specdiff

    int timesig = m_bpb;
    if (timesig == 0) timesig = 4;

    d_vec_t dbcand(timesig); // downbeat candidates

    for (int beat = 0; beat < timesig; ++beat) {
        dbcand[beat] = 0;
    }

   // look for beat transition which leads to greatest spectral change
   for (int beat = 0; beat < timesig; ++beat) {
       int count = 0;
       for (int example = beat-1; example < (int)m_beatsd.size(); example += timesig) {
           if (example < 0) continue;
           dbcand[beat] += (m_beatsd[example]) / timesig;
           ++count;
       }
       if (count > 0) dbcand[beat] /= count;
//        std::cerr << "dbcand[" << beat << "] = " << dbcand[beat] << std::endl;
   }

    // first downbeat is beat at index of maximum value of dbcand
    int dbind = MathUtilities::getMax(dbcand);

    // remaining downbeats are at timesig intervals from the first
    for (int i = dbind; i < (int)beats.size(); i += timesig) {
        downbeats.push_back(i);
    }
}
Example #7
0
void
TempoTrackV2::filter_df(d_vec_t &df)
{
    d_vec_t a(3);
    d_vec_t b(3);
    d_vec_t	lp_df(df.size());

    //equivalent in matlab to [b,a] = butter(2,0.4);
    a[0] = 1.0000;
    a[1] = -0.3695;
    a[2] = 0.1958;
    b[0] = 0.2066;
    b[1] = 0.4131;
    b[2] = 0.2066;

    double inp1 = 0.;
    double inp2 = 0.;
    double out1 = 0.;
    double out2 = 0.;


    // forwards filtering
    for (unsigned int i = 0;i < df.size();i++)
    {
        lp_df[i] =  b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
        inp2 = inp1;
        inp1 = df[i];
        out2 = out1;
        out1 = lp_df[i];
    }

    // copy forwards filtering to df...
    // but, time-reversed, ready for backwards filtering
    for (unsigned int i = 0;i < df.size();i++)
    {
        df[i] = lp_df[df.size()-i-1];
    }

    for (unsigned int i = 0;i < df.size();i++)
    {
        lp_df[i] = 0.;
    }

    inp1 = 0.; inp2 = 0.;
    out1 = 0.; out2 = 0.;

  // backwards filetering on time-reversed df
    for (unsigned int i = 0;i < df.size();i++)
    {
        lp_df[i] =  b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
        inp2 = inp1;
        inp1 = df[i];
        out2 = out1;
        out1 = lp_df[i];
    }

  // write the re-reversed (i.e. forward) version back to df
    for (unsigned int i = 0;i < df.size();i++)
    {
        df[i] = lp_df[df.size()-i-1];
    }
}
Example #8
0
void
TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
{
    // following Kevin Murphy's Viterbi decoding to get best path of
    // beat periods through rfcmat

    // make transition matrix
    d_mat_t tmat;
    for (unsigned int i=0;i<wv.size();i++)
    {
        tmat.push_back ( d_vec_t() ); // adds a new column
        for (unsigned int j=0; j<wv.size(); j++)
        {
            tmat[i].push_back(0.); // fill with zeros initially
        }
    }

    // variance of Gaussians in transition matrix
    // formed of Gaussians on diagonal - implies slow tempo change
    double sigma = 8.;
    // don't want really short beat periods, or really long ones
    for (unsigned int i=20;i <wv.size()-20; i++)
    {
        for (unsigned int j=20; j<wv.size()-20; j++)
        {
            double mu = static_cast<double>(i);
            tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
        }
    }

    // parameters for Viterbi decoding... this part is taken from
    // Murphy's matlab

    d_mat_t delta;
    i_mat_t psi;
    for (unsigned int i=0;i <rcfmat.size(); i++)
    {
        delta.push_back( d_vec_t());
        psi.push_back( i_vec_t());
        for (unsigned int j=0; j<rcfmat[i].size(); j++)
        {
            delta[i].push_back(0.); // fill with zeros initially
            psi[i].push_back(0); // fill with zeros initially
        }
    }


    unsigned int T = delta.size();

    if (T < 2) return; // can't do anything at all meaningful

    unsigned int Q = delta[0].size();

    // initialize first column of delta
    for (unsigned int j=0; j<Q; j++)
    {
        delta[0][j] = wv[j] * rcfmat[0][j];
        psi[0][j] = 0;
    }

    double deltasum = 0.;
    for (unsigned int i=0; i<Q; i++)
    {
        deltasum += delta[0][i];
    }
    for (unsigned int i=0; i<Q; i++)
    {
        delta[0][i] /= (deltasum + EPS);
    }


    for (unsigned int t=1; t<T; t++)
    {
        d_vec_t tmp_vec(Q);

        for (unsigned int j=0; j<Q; j++)
        {
            for (unsigned int i=0; i<Q; i++)
            {
                tmp_vec[i] = delta[t-1][i] * tmat[j][i];
            }

            delta[t][j] = get_max_val(tmp_vec);

            psi[t][j] = get_max_ind(tmp_vec);

            delta[t][j] *= rcfmat[t][j];
        }

        // normalise current delta column
        double deltasum = 0.;
        for (unsigned int i=0; i<Q; i++)
        {
            deltasum += delta[t][i];
        }
        for (unsigned int i=0; i<Q; i++)
        {
            delta[t][i] /= (deltasum + EPS);
        }
    }

    i_vec_t bestpath(T);
    d_vec_t tmp_vec(Q);
    for (unsigned int i=0; i<Q; i++)
    {
        tmp_vec[i] = delta[T-1][i];
    }

    // find starting point - best beat period for "last" frame
    bestpath[T-1] = get_max_ind(tmp_vec);

    // backtrace through index of maximum values in psi
    for (unsigned int t=T-2; t>0 ;t--)
    {
        bestpath[t] = psi[t+1][bestpath[t+1]];
    }

    // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
    bestpath[0] = psi[1][bestpath[1]];

    unsigned int lastind = 0;
    for (unsigned int i=0; i<T; i++)
    {
        unsigned int step = 128;
        for (unsigned int j=0; j<step; j++)
        {
            lastind = i*step+j;
            beat_period[lastind] = bestpath[i];
        }
//        std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl;
    }

    //fill in the last values...
    for (unsigned int i=lastind; i<beat_period.size(); i++)
    {
        beat_period[i] = beat_period[lastind];
    }

    for (unsigned int i = 0; i < beat_period.size(); i++)
    {
        tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
    }
}