void TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf) { // calculate autocorrelation function // then rcf // just hard code for now... don't really need separate functions to do this // make acf d_vec_t dfframe(dfframe_in); MathUtilities::adaptiveThreshold(dfframe); d_vec_t acf(dfframe.size()); for (unsigned int lag=0; lag<dfframe.size(); lag++) { double sum = 0.; double tmp = 0.; for (unsigned int n=0; n<(dfframe.size()-lag); n++) { tmp = dfframe[n] * dfframe[n+lag]; sum += tmp; } acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag)); } // now apply comb filtering int numelem = 4; for (unsigned int i = 2;i < rcf.size();i++) // max beat period { for (int a = 1;a <= numelem;a++) // number of comb elements { for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements { rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row } } } // apply adaptive threshold to rcf MathUtilities::adaptiveThreshold(rcf); double rcfsum =0.; for (unsigned int i=0; i<rcf.size(); i++) { rcf[i] += EPS ; rcfsum += rcf[i]; } // normalise rcf to sum to unity for (unsigned int i=0; i<rcf.size(); i++) { rcf[i] /= (rcfsum + EPS); } }
void TempoTrackV2::normalise_vec(d_vec_t &df) { double sum = 0.; for (unsigned int i=0; i<df.size(); i++) { sum += df[i]; } for (unsigned int i=0; i<df.size(); i++) { df[i]/= (sum + EPS); } }
double TempoTrackV2::get_max_val(const d_vec_t &df) { double maxval = 0.; for (unsigned int i=0; i<df.size(); i++) { if (maxval < df[i]) { maxval = df[i]; } } return maxval; }
int TempoTrackV2::get_max_ind(const d_vec_t &df) { double maxval = 0.; int ind = 0; for (unsigned int i=0; i<df.size(); i++) { if (maxval < df[i]) { maxval = df[i]; ind = i; } } return ind; }
double DownBeat::measureSpecDiff(d_vec_t oldspec, d_vec_t newspec) { // JENSEN-SHANNON DIVERGENCE BETWEEN SPECTRAL FRAMES unsigned int SPECSIZE = 512; // ONLY LOOK AT FIRST 512 SAMPLES OF SPECTRUM. if (SPECSIZE > oldspec.size()/4) { SPECSIZE = oldspec.size()/4; } double SD = 0.; double sd1 = 0.; double sumnew = 0.; double sumold = 0.; for (unsigned int i = 0;i < SPECSIZE;i++) { newspec[i] +=EPS; oldspec[i] +=EPS; sumnew+=newspec[i]; sumold+=oldspec[i]; } for (unsigned int i = 0;i < SPECSIZE;i++) { newspec[i] /= (sumnew); oldspec[i] /= (sumold); // IF ANY SPECTRAL VALUES ARE 0 (SHOULDN'T BE ANY!) SET THEM TO 1 if (newspec[i] == 0) { newspec[i] = 1.; } if (oldspec[i] == 0) { oldspec[i] = 1.; } // JENSEN-SHANNON CALCULATION sd1 = 0.5*oldspec[i] + 0.5*newspec[i]; SD = SD + (-sd1*log(sd1)) + (0.5*(oldspec[i]*log(oldspec[i]))) + (0.5*(newspec[i]*log(newspec[i]))); } return SD; }
void DownBeat::findDownBeats(const float *audio, size_t audioLength, const d_vec_t &beats, i_vec_t &downbeats) { // FIND DOWNBEATS BY PARTITIONING THE INPUT AUDIO FILE INTO BEAT SEGMENTS // WHERE THE AUDIO FRAMES ARE DOWNSAMPLED BY A FACTOR OF 16 (fs ~= 2700Hz) // THEN TAKING THE JENSEN-SHANNON DIVERGENCE BETWEEN BEAT SYNCHRONOUS SPECTRAL FRAMES // IMPLEMENTATION (MOSTLY) FOLLOWS: // DAVIES AND PLUMBLEY "A SPECTRAL DIFFERENCE APPROACH TO EXTRACTING DOWNBEATS IN MUSICAL AUDIO" // EUSIPCO 2006, FLORENCE, ITALY d_vec_t newspec(m_beatframesize / 2); // magnitude spectrum of current beat d_vec_t oldspec(m_beatframesize / 2); // magnitude spectrum of previous beat m_beatsd.clear(); if (audioLength == 0) return; for (size_t i = 0; i + 1 < beats.size(); ++i) { // Copy the extents of the current beat from downsampled array // into beat frame buffer size_t beatstart = (beats[i] * m_increment) / m_factor; size_t beatend = (beats[i+1] * m_increment) / m_factor; if (beatend >= audioLength) beatend = audioLength - 1; if (beatend < beatstart) beatend = beatstart; size_t beatlen = beatend - beatstart; // Also apply a Hanning window to the beat frame buffer, sized // to the beat extents rather than the frame size. (Because // the size varies, it's easier to do this by hand than use // our Window abstraction.) // std::cerr << "beatlen = " << beatlen << std::endl; // float rms = 0; for (size_t j = 0; j < beatlen && j < m_beatframesize; ++j) { double mul = 0.5 * (1.0 - cos(TWO_PI * (double(j) / double(beatlen)))); m_beatframe[j] = audio[beatstart + j] * mul; // rms += m_beatframe[j] * m_beatframe[j]; } // rms = sqrt(rms); // std::cerr << "beat " << i << ": audio rms " << rms << std::endl; for (size_t j = beatlen; j < m_beatframesize; ++j) { m_beatframe[j] = 0.0; } // Now FFT beat frame m_fft->process(false, m_beatframe, m_fftRealOut, m_fftImagOut); // Calculate magnitudes for (size_t j = 0; j < m_beatframesize/2; ++j) { newspec[j] = sqrt(m_fftRealOut[j] * m_fftRealOut[j] + m_fftImagOut[j] * m_fftImagOut[j]); } // Preserve peaks by applying adaptive threshold MathUtilities::adaptiveThreshold(newspec); // Calculate JS divergence between new and old spectral frames if (i > 0) { // otherwise we have no previous frame m_beatsd.push_back(measureSpecDiff(oldspec, newspec)); // std::cerr << "specdiff: " << m_beatsd[m_beatsd.size()-1] << std::endl; } // Copy newspec across to old for (size_t j = 0; j < m_beatframesize/2; ++j) { oldspec[j] = newspec[j]; } } // We now have all spectral difference measures in specdiff int timesig = m_bpb; if (timesig == 0) timesig = 4; d_vec_t dbcand(timesig); // downbeat candidates for (int beat = 0; beat < timesig; ++beat) { dbcand[beat] = 0; } // look for beat transition which leads to greatest spectral change for (int beat = 0; beat < timesig; ++beat) { int count = 0; for (int example = beat-1; example < (int)m_beatsd.size(); example += timesig) { if (example < 0) continue; dbcand[beat] += (m_beatsd[example]) / timesig; ++count; } if (count > 0) dbcand[beat] /= count; // std::cerr << "dbcand[" << beat << "] = " << dbcand[beat] << std::endl; } // first downbeat is beat at index of maximum value of dbcand int dbind = MathUtilities::getMax(dbcand); // remaining downbeats are at timesig intervals from the first for (int i = dbind; i < (int)beats.size(); i += timesig) { downbeats.push_back(i); } }
void TempoTrackV2::filter_df(d_vec_t &df) { d_vec_t a(3); d_vec_t b(3); d_vec_t lp_df(df.size()); //equivalent in matlab to [b,a] = butter(2,0.4); a[0] = 1.0000; a[1] = -0.3695; a[2] = 0.1958; b[0] = 0.2066; b[1] = 0.4131; b[2] = 0.2066; double inp1 = 0.; double inp2 = 0.; double out1 = 0.; double out2 = 0.; // forwards filtering for (unsigned int i = 0;i < df.size();i++) { lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2; inp2 = inp1; inp1 = df[i]; out2 = out1; out1 = lp_df[i]; } // copy forwards filtering to df... // but, time-reversed, ready for backwards filtering for (unsigned int i = 0;i < df.size();i++) { df[i] = lp_df[df.size()-i-1]; } for (unsigned int i = 0;i < df.size();i++) { lp_df[i] = 0.; } inp1 = 0.; inp2 = 0.; out1 = 0.; out2 = 0.; // backwards filetering on time-reversed df for (unsigned int i = 0;i < df.size();i++) { lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2; inp2 = inp1; inp1 = df[i]; out2 = out1; out1 = lp_df[i]; } // write the re-reversed (i.e. forward) version back to df for (unsigned int i = 0;i < df.size();i++) { df[i] = lp_df[df.size()-i-1]; } }
void TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi) { // following Kevin Murphy's Viterbi decoding to get best path of // beat periods through rfcmat // make transition matrix d_mat_t tmat; for (unsigned int i=0;i<wv.size();i++) { tmat.push_back ( d_vec_t() ); // adds a new column for (unsigned int j=0; j<wv.size(); j++) { tmat[i].push_back(0.); // fill with zeros initially } } // variance of Gaussians in transition matrix // formed of Gaussians on diagonal - implies slow tempo change double sigma = 8.; // don't want really short beat periods, or really long ones for (unsigned int i=20;i <wv.size()-20; i++) { for (unsigned int j=20; j<wv.size()-20; j++) { double mu = static_cast<double>(i); tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) ); } } // parameters for Viterbi decoding... this part is taken from // Murphy's matlab d_mat_t delta; i_mat_t psi; for (unsigned int i=0;i <rcfmat.size(); i++) { delta.push_back( d_vec_t()); psi.push_back( i_vec_t()); for (unsigned int j=0; j<rcfmat[i].size(); j++) { delta[i].push_back(0.); // fill with zeros initially psi[i].push_back(0); // fill with zeros initially } } unsigned int T = delta.size(); if (T < 2) return; // can't do anything at all meaningful unsigned int Q = delta[0].size(); // initialize first column of delta for (unsigned int j=0; j<Q; j++) { delta[0][j] = wv[j] * rcfmat[0][j]; psi[0][j] = 0; } double deltasum = 0.; for (unsigned int i=0; i<Q; i++) { deltasum += delta[0][i]; } for (unsigned int i=0; i<Q; i++) { delta[0][i] /= (deltasum + EPS); } for (unsigned int t=1; t<T; t++) { d_vec_t tmp_vec(Q); for (unsigned int j=0; j<Q; j++) { for (unsigned int i=0; i<Q; i++) { tmp_vec[i] = delta[t-1][i] * tmat[j][i]; } delta[t][j] = get_max_val(tmp_vec); psi[t][j] = get_max_ind(tmp_vec); delta[t][j] *= rcfmat[t][j]; } // normalise current delta column double deltasum = 0.; for (unsigned int i=0; i<Q; i++) { deltasum += delta[t][i]; } for (unsigned int i=0; i<Q; i++) { delta[t][i] /= (deltasum + EPS); } } i_vec_t bestpath(T); d_vec_t tmp_vec(Q); for (unsigned int i=0; i<Q; i++) { tmp_vec[i] = delta[T-1][i]; } // find starting point - best beat period for "last" frame bestpath[T-1] = get_max_ind(tmp_vec); // backtrace through index of maximum values in psi for (unsigned int t=T-2; t>0 ;t--) { bestpath[t] = psi[t+1][bestpath[t+1]]; } // weird but necessary hack -- couldn't get above loop to terminate at t >= 0 bestpath[0] = psi[1][bestpath[1]]; unsigned int lastind = 0; for (unsigned int i=0; i<T; i++) { unsigned int step = 128; for (unsigned int j=0; j<step; j++) { lastind = i*step+j; beat_period[lastind] = bestpath[i]; } // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl; } //fill in the last values... for (unsigned int i=lastind; i<beat_period.size(); i++) { beat_period[i] = beat_period[lastind]; } for (unsigned int i = 0; i < beat_period.size(); i++) { tempi.push_back((60. * m_rate / m_increment)/beat_period[i]); } }