int refineBeat(int isBeat) { BOOL accepted=FALSE; BOOL predicted=FALSE; BOOL resyncin=FALSE; BOOL resyncout=FALSE; if (isBeat) // Show the beat received from AVS SliderStep(IDC_IN, &inSlide); DWORD TCNow = GetTickCount(); if (songChanged(TCNow)) { bestConfidence=(int)((float)bestConfidence*0.5); sticked=0; stickyConfidenceCount=0; if (cfg_smartbeatresetnewsong) ResetAdapt(); } // Try to predict if this frame should be a beat if (Bpm && TCNow > predictionLastTC + (60000 / Bpm)) predicted = TRUE; if (isBeat) // If it is a real beat, do discrimination/guessing and computations, then see if it is accepted accepted = TCHistStep(TCHist, Avg, halfDiscriminated, &hdPos, &lastTC, TCNow, BEAT_REAL); // Calculate current Bpm CalcBPM(); // If prediction Bpm has not yet been set // or if prediction bpm is too high or too low // or if 3/4 of our history buffer contains beats within the range of typical drift // the accept the calculated Bpm as the new prediction Bpm // This allows keeping the beat going on when the music fades out, and readapt to the new beat as soon as // the music fades in again if ((accepted || predicted) && !sticked && (!predictionBpm || predictionBpm > MAX_BPM || predictionBpm < MIN_BPM)) { if (Confidence >= bestConfidence) { /* betterConfidenceCount++; if (!predictionBpm || betterConfidenceCount == BETTER_CONF_ADOPT) {*/ forceNewBeat=1; /* betterConfidenceCount=0; }*/ } if (Confidence >= 50) { topConfidenceCount++; if (topConfidenceCount == TOP_CONF_ADOPT) { forceNewBeat=1; topConfidenceCount=0; } } if (forceNewBeat) { forceNewBeat=0; bestConfidence = Confidence; predictionBpm=Bpm; } } if (!sticked) predictionBpm = Bpm; Bpm=predictionBpm; /* resync = (predictionBpm && (predictionLastTC < TCNow - (30000/predictionBpm) - (60000/predictionBpm)*0.2) || (predictionLastTC < TCNow - (30000/predictionBpm) - (60000/predictionBpm)*0.2));*/ if (predictionBpm && accepted && !predicted) { int b; if (TCNow > predictionLastTC + (60000 / predictionBpm)*0.7) { resyncin = TRUE; b = (int)((float)predictionBpm * 1.01); } if (TCNow < predictionLastTC + (60000 / predictionBpm)*0.3) { int b; resyncout = TRUE; b = (int)((float)predictionBpm * 0.98); } if (!sticked && doResyncBpm && (resyncin || resyncout)) { newBpm(b); predictionBpm = GetBpm(); } } if (resyncin) { predictionLastTC = TCNow; SliderStep(IDC_OUT, &outSlide); doResyncBpm=TRUE; return ((cfg_smartbeat && !cfg_smartbeatonlysticky) || (cfg_smartbeat && cfg_smartbeatonlysticky && sticked)) ? 1 : isBeat; } if (predicted) { predictionLastTC = TCNow; if (Confidence > 25) TCHistStep(TCHist, Avg, halfDiscriminated, &hdPos, &lastTC, TCNow, BEAT_GUESSED); SliderStep(IDC_OUT, &outSlide); doResyncBpm=FALSE; return ((cfg_smartbeat && !cfg_smartbeatonlysticky) || (cfg_smartbeat && cfg_smartbeatonlysticky && sticked)) ? 1 : isBeat; } if (resyncout) { predictionLastTC = TCNow; doResyncBpm=TRUE; return ((cfg_smartbeat && !cfg_smartbeatonlysticky) || (cfg_smartbeat && cfg_smartbeatonlysticky && sticked)) ? 0 : isBeat; } return ((cfg_smartbeat && !cfg_smartbeatonlysticky) || (cfg_smartbeat && cfg_smartbeatonlysticky && sticked)) ? (predictionBpm ? 0 : isBeat) : isBeat; }
// configuration dialog stuff BOOL CALLBACK DlgProc_Bpm(HWND hwndDlg, UINT uMsg, WPARAM wParam,LPARAM lParam) { switch (uMsg) { case WM_INITDIALOG: inInc = 1; outInc = 1; inSlide = 0; outSlide = 0; oldDisplayBpm=-1; oldDisplayConfidence=-1; oldInSlide=-1; oldOutSlide=-1; if (cfg_smartbeat) CheckDlgButton(hwndDlg,IDC_BPMADV,BST_CHECKED); else CheckDlgButton(hwndDlg,IDC_BPMSTD,BST_CHECKED); if (cfg_smartbeatsticky) CheckDlgButton(hwndDlg,IDC_STICKY,BST_CHECKED); if (cfg_smartbeatresetnewsong) CheckDlgButton(hwndDlg,IDC_NEWRESET,BST_CHECKED); else CheckDlgButton(hwndDlg,IDC_NEWADAPT,BST_CHECKED); if (cfg_smartbeatonlysticky) CheckDlgButton(hwndDlg,IDC_ONLYSTICKY,BST_CHECKED); SendDlgItemMessage(hwndDlg, IDC_IN, TBM_SETTICFREQ, 1, 0); SendDlgItemMessage(hwndDlg, IDC_IN, TBM_SETRANGE, TRUE, MAKELONG(0, 8)); SendDlgItemMessage(hwndDlg, IDC_OUT, TBM_SETTICFREQ, 1, 0); SendDlgItemMessage(hwndDlg, IDC_OUT, TBM_SETRANGE, TRUE, MAKELONG(0, 8)); if (predictionBpm) { ShowWindow(GetDlgItem(hwndDlg, IDC_STICK), sticked ? SW_HIDE : SW_NORMAL); ShowWindow(GetDlgItem(hwndDlg, IDC_UNSTICK), sticked ? SW_NORMAL : SW_HIDE); } else { ShowWindow(GetDlgItem(hwndDlg, IDC_STICK), SW_HIDE); ShowWindow(GetDlgItem(hwndDlg, IDC_UNSTICK), SW_HIDE); } /* ShowWindow(GetDlgItem(hwndDlg, IDC_CURBPM), cfg_smartbeat ? SW_NORMAL : SW_HIDE); ShowWindow(GetDlgItem(hwndDlg, IDC_CURCONF), cfg_smartbeat ? SW_NORMAL : SW_HIDE); ShowWindow(GetDlgItem(hwndDlg, IDC_BPM), cfg_smartbeat ? SW_NORMAL : SW_HIDE); ShowWindow(GetDlgItem(hwndDlg, IDC_CONFIDENCE), cfg_smartbeat ? SW_NORMAL : SW_HIDE); ShowWindow(GetDlgItem(hwndDlg, IDC_RESET), cfg_smartbeat ? SW_NORMAL : SW_HIDE);*/ SetTimer(hwndDlg, 0, 50, NULL); return 1; case WM_TIMER: { if (oldInSlide != inSlide) { SendDlgItemMessage(hwndDlg, IDC_IN, TBM_SETPOS, TRUE, inSlide); oldInSlide=inSlide; } if (oldOutSlide != outSlide) { SendDlgItemMessage(hwndDlg, IDC_OUT, TBM_SETPOS, TRUE, outSlide); oldOutSlide=outSlide; } if (oldDisplayBpm != predictionBpm || oldsticked != sticked) { wsprintf(txt, predictionBpm ? "%d%s"/*/%d"*/ : "Learning...", predictionBpm, cfg_smartbeatsticky && sticked ? " Got it!" : ""/*, Bpm*/); SetDlgItemText(hwndDlg, IDC_BPM, txt); oldDisplayBpm=predictionBpm; oldsticked=sticked; if (predictionBpm) { ShowWindow(GetDlgItem(hwndDlg, IDC_STICK), sticked ? SW_HIDE : SW_NORMAL); ShowWindow(GetDlgItem(hwndDlg, IDC_UNSTICK), sticked ? SW_NORMAL : SW_HIDE); } else { ShowWindow(GetDlgItem(hwndDlg, IDC_STICK), SW_HIDE); ShowWindow(GetDlgItem(hwndDlg, IDC_UNSTICK), SW_HIDE); } } if (oldDisplayConfidence != Confidence) { wsprintf(txt, "%d%%"/* (%d%%/%d%% - %d)"*/, Confidence/*, Confidence1, Confidence2, TCUsed*/); SetDlgItemText(hwndDlg, IDC_CONFIDENCE, txt); oldDisplayConfidence=Confidence; } } return 0; case WM_COMMAND: if ((LOWORD(wParam) == IDC_BPMSTD) || (LOWORD(wParam) == IDC_BPMADV) || (LOWORD(wParam) == IDC_NEWRESET) || (LOWORD(wParam) == IDC_NEWADAPT) || (LOWORD(wParam) == IDC_ONLYSTICKY) || (LOWORD(wParam) == IDC_STICKY)) { cfg_smartbeat=IsDlgButtonChecked(hwndDlg,IDC_BPMADV)?1:0; cfg_smartbeatsticky=IsDlgButtonChecked(hwndDlg,IDC_STICKY)?1:0; cfg_smartbeatresetnewsong=IsDlgButtonChecked(hwndDlg,IDC_NEWRESET)?1:0; cfg_smartbeatonlysticky=IsDlgButtonChecked(hwndDlg,IDC_ONLYSTICKY)?1:0; oldsticked=-1; /* ShowWindow(GetDlgItem(hwndDlg, IDC_CURBPM), cfg_smartbeat ? SW_NORMAL : SW_HIDE); ShowWindow(GetDlgItem(hwndDlg, IDC_CURCONF), cfg_smartbeat ? SW_NORMAL : SW_HIDE); ShowWindow(GetDlgItem(hwndDlg, IDC_BPM), cfg_smartbeat ? SW_NORMAL : SW_HIDE); ShowWindow(GetDlgItem(hwndDlg, IDC_CONFIDENCE), cfg_smartbeat ? SW_NORMAL : SW_HIDE); ShowWindow(GetDlgItem(hwndDlg, IDC_RESET), cfg_smartbeat ? SW_NORMAL : SW_HIDE);*/ } if (LOWORD(wParam) == IDC_2X) doubleBeat(); if (LOWORD(wParam) == IDC_DIV2) halfBeat(); if (LOWORD(wParam) == IDC_RESET) ResetAdapt(); if (LOWORD(wParam) == IDC_STICK) { sticked=1; stickyConfidenceCount=0; } if (LOWORD(wParam) == IDC_UNSTICK) { sticked=0; stickyConfidenceCount=0; } return 0; case WM_DESTROY: KillTimer(hwndDlg, 0); return 0; } return 0; }
// Calculate BPM according to beat history void CalcBPM(void) { int i; int hdCount=0; int r=0; int totalTC=0, totalN=0; float rC, etC; int v; double sc=0; int mx=0; float et; int smSum=0, smN=0; if (!ReadyToLearn()) return; // First calculate average beat for (i=0;i<TCHistSize-1;i++) totalTC += TCHist[i].TC - TCHist[i+1].TC; Avg = totalTC/(TCHistSize-1); // Count how many of then are real as opposed to guessed for (i=0;i<TCHistSize;i++) if (TCHist[i].Type == BEAT_REAL) r++; // Calculate part 1 of confidence rC = (float)min((float)((float)r / (float)TCHistSize) * 2, 1); // Calculate typical drift for (i=0;i<TCHistSize-1;i++) { v = TCHist[i].TC - TCHist[i+1].TC; mx = max(mx, v); sc += v*v; } et = (float)sqrt(sc / (TCHistSize-1) - Avg*Avg); // Calculate confidence based on typical drift and max derivation etC = 1 - ((float)et / (float)mx); // Calculate confidence Confidence = max(0, (int)(((rC * etC) * 100.0) - 50) * 2); Confidence1 = (int)(rC * 100); Confidence2 = (int)(etC * 100); // Now apply second layer, recalculate average using only beats within range of typical drift // Also, count how many of them we are keeping totalTC=0; for (i=0;i<TCHistSize-1;i++) { v += TCHist[i].TC - TCHist[i+1].TC; if (abs(Avg-v) < et) { totalTC += v; totalN++; v = 0; } else if ((float)v > Avg) v = 0; } TCUsed = totalN; // If no beat was within typical drift (how would it be possible? well lets cover our ass) then keep the simple // average calculated earlier, else recalculate average of beats within range if (totalN) Avg = totalTC/totalN; if (ReadyToGuess()) { if (Avg) // Avg = 0 ? Ahem.. Bpm = 60000 / Avg; if (Bpm != lastBPM) { newBpm(Bpm); // If realtime Bpm has changed since last time, then insert it in the smoothing tab;e lastBPM = Bpm; if (cfg_smartbeatsticky && predictionBpm && Confidence >= ((predictionBpm < 90) ? STICKY_THRESHOLD_LOW : STICKY_THRESHOLD)) { stickyConfidenceCount++; if (stickyConfidenceCount >= MIN_STICKY) sticked=1; } else stickyConfidenceCount=0; } Bpm = GetBpm(); // Count how many beats we discriminated for (i=0;i<TCHistSize;i++) if (halfDiscriminated[i]) hdCount++; if (hdCount >= TCHistSize/2) // If we removed at least half of our beats, then we are off course. We should double our bpm { if (Bpm * 2 < MAX_BPM) // Lets do so only if the doubled bpm is < MAX_BPM { doubleBeat(); memset(halfDiscriminated, 0, TCHistSize*sizeof(int)); // Reset discrimination table } } if (Bpm > 500 || Bpm < 0) { ResetAdapt(); } if (Bpm < MIN_BPM) { if (++doubleCount > 4) // We're going too slow, lets double our bpm doubleBeat(); } else doubleCount=0; if (Bpm > MAX_BPM) // We're going too fast, lets slow our bpm by a factor of 2 { if (++halfCount > 4) halfBeat(); } else halfCount=0; } }
int main(int argc, char *argv[]) { char *s; char *labfn; int numUtt; void Initialise(void); void DoGeneration(char *labfn); if (InitShell(argc, argv, hmgens_version, hmgens_vc_id) < SUCCESS) HError(2300, "HMGenS: InitShell failed"); InitMem(); InitMath(); InitSigP(); InitWave(); InitLabel(); InitModel(); if (InitParm() < SUCCESS) HError(2300, "HMGenS: InitParm failed"); InitUtil(); InitFB(); InitAdapt(&xfInfo_hmm, &xfInfo_dur); InitMap(); InitGen(); if (NumArgs() == 0) ReportUsage(); CreateHeap(&genStack, "genStore", MSTAK, 1, 1.0, 80000, 400000); CreateHeap(&uttStack, "uttStore", MSTAK, 1, 0.5, 100, 1000); CreateHeap(&fbInfoStack, "FBInfoStore", MSTAK, 1, 0.5, 100, 1000); CreateHeap(&hmmStack, "HmmStore", MSTAK, 1, 1.0, 50000, 500000); CreateHeap(&dmStack, "dmStore", MSTAK, 1, 1.0, 50000, 500000); SetConfParms(); CreateHMMSet(&hmset, &hmmStack, TRUE); CreateHMMSet(&dmset, &dmStack, TRUE); utt = (UttInfo *) New(&uttStack, sizeof(UttInfo)); genInfo = (GenInfo *) New(&genStack, sizeof(GenInfo)); fbInfo = (FBInfo *) New(&fbInfoStack, sizeof(FBInfo)); while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s) != 1) HError(9919, "HMGenS: Bad switch %s; must be single letter", s); switch (s[0]) { case 'a': xfInfo_hmm.useInXForm = TRUE; break; case 'b': xfInfo_dur.useInXForm = TRUE; break; case 'c': if (NextArg() != INTARG) HError(2119, "HMGenS: Parameter generation algorithm type value expected"); type = (ParmGenType) GetChkedInt(CHOLESKY, FB, s); break; case 'd': if (NextArg() != STRINGARG) HError(2319, "HMGenS: HMM definition directory expected"); hmmDir = GetStrArg(); break; case 'e': useAlign = TRUE; break; case 'f': frameRate = (HTime) GetChkedFlt(0.0, 10000000.0, s); break; case 'g': minFrwdP = GetChkedFlt(0.0, 1000.0, s); break; case 'h': if (NextArg() != STRINGARG) HError(1, "Speaker name pattern expected"); xfInfo_hmm.inSpkrPat = xfInfo_dur.inSpkrPat = GetStrArg(); if (NextArg() == STRINGARG) xfInfo_hmm.paSpkrPat = xfInfo_dur.paSpkrPat = GetStrArg(); if (NextArg() != SWITCHARG) HError(2319, "HMGenS: cannot have -h as the last option"); break; case 'm': modelAlign = TRUE; break; case 'n': if (NextArg() != STRINGARG) HError(2319, "HMGenS: duration model definition directory expected"); dmDir = GetStrArg(); break; case 'p': outPdf = TRUE; break; case 'r': if (NextArg() != FLOATARG) HError(2119, "HMGenS: Speaking rate value (float) expected"); speakRate = GetChkedFlt(0.0, 3.0, s); break; case 's': stateAlign = TRUE; break; case 't': pruneInit = GetChkedFlt(0.0, 1.0E20, s); if (NextArg() == FLOATARG || NextArg() == INTARG) { pruneInc = GetChkedFlt(0.0, 1.0E20, s); pruneLim = GetChkedFlt(0.0, 1.0E20, s); } else { pruneInc = 0.0; pruneLim = pruneInit; } break; case 'v': MSDthresh = GetChkedFlt(0.0, 1.0, s); break; case 'x': if (NextArg() != STRINGARG) HError(2319, "HMGenS: HMM file extension expected"); hmmExt = GetStrArg(); break; case 'y': if (NextArg() != STRINGARG) HError(2319, "HMGenS: duration model file extension expected"); dmExt = GetStrArg(); break; case 'B': inBinary = TRUE; break; case 'E': if (NextArg() != STRINGARG) HError(2319, "HMGenS: parent transform directory expected"); xfInfo_hmm.usePaXForm = TRUE; xfInfo_hmm.paXFormDir = GetStrArg(); if (NextArg() == STRINGARG) xfInfo_hmm.paXFormExt = GetStrArg(); if (NextArg() != SWITCHARG) HError(2319, "HMGenS: cannot have -E as the last option"); break; case 'G': if (NextArg() != STRINGARG) HError(2119, "HMGenS: Label File format expected"); if ((lff = Str2Format(GetStrArg())) == ALIEN) HError(-2189, "HMGenS: Warning ALIEN Label file format set"); break; case 'H': if (NextArg() != STRINGARG) HError(3219, "HMGenS: HMM MMF File name expected"); AddMMF(&hmset, GetStrArg()); break; case 'I': if (NextArg() != STRINGARG) HError(2319, "HMGenS: MLF file name expected"); LoadMasterFile(GetStrArg()); break; case 'J': if (NextArg() != STRINGARG) HError(2319, "HMGenS: input transform directory expected"); AddInXFormDir(&hmset, GetStrArg()); if (NextArg() == STRINGARG) xfInfo_hmm.inXFormExt = GetStrArg(); if (NextArg() != SWITCHARG) HError(2319, "HMGenS: cannot have -J as the last option"); break; case 'L': if (NextArg() != STRINGARG) HError(2319, "HMGenS: Label file directory expected"); labDir = GetStrArg(); break; case 'M': if (NextArg() != STRINGARG) HError(2319, "HMGenS: Output macro file directory expected"); genDir = GetStrArg(); break; case 'N': if (NextArg() != STRINGARG) HError(3219, "HMGenS: Duration MMF File name expected"); AddMMF(&dmset, GetStrArg()); break; case 'T': if (NextArg() != INTARG) HError(2119, "HMGenS: Trace value expected"); trace = GetChkedInt(0, 0002, s); break; case 'W': if (NextArg() != STRINGARG) HError(2319, "HMGenS: parent duration transform directory expected"); xfInfo_dur.usePaXForm = TRUE; xfInfo_dur.paXFormDir = GetStrArg(); if (NextArg() == STRINGARG) xfInfo_dur.paXFormExt = GetStrArg(); if (NextArg() != SWITCHARG) HError(2319, "HMGenS: cannot have -W as the last option"); break; case 'X': if (NextArg() != STRINGARG) HError(2319, "HMGenS: Label file extension expected"); labExt = GetStrArg(); break; case 'Y': if (NextArg() != STRINGARG) HError(2319, "HMGenS: input duration transform directory expected"); AddInXFormDir(&dmset, GetStrArg()); if (NextArg() == STRINGARG) { if (xfInfo_dur.inXFormExt == NULL) xfInfo_dur.inXFormExt = GetStrArg(); else HError(2319, "MGenS: only one input duration transform extension may be specified"); } if (NextArg() != SWITCHARG) HError(2319, "HMGenS: cannot have -Y as the last option"); break; default: HError(9919, "HMGenS: Unknown switch %s", s); } } if (NextArg() != STRINGARG) HError(2319, "HMGenS: file name of vocabulary list expected"); Initialise(); InitUttInfo(utt, FALSE); numUtt = 1; if (trace & T_TOP) SetTraceGen(); /* generate parameter sequences */ do { if (NextArg() != STRINGARG) HError(2319, "HMGenS: data file name expected"); labfn = GetStrArg(); /* track speakers */ if (UpdateSpkrStats(&hmset, &xfInfo_hmm, labfn)) { if (!xfInfo_hmm.useInXForm) xfInfo_hmm.inXForm = NULL; } if (UpdateSpkrStats(&dmset, &xfInfo_dur, labfn)) { if (!xfInfo_dur.useInXForm) xfInfo_dur.inXForm = NULL; else ResetDMMPreComps(&dmset); } fbInfo->xfinfo_hmm = &xfInfo_hmm; fbInfo->xfinfo_dur = &xfInfo_dur; fbInfo->inXForm_hmm = xfInfo_hmm.inXForm; fbInfo->inXForm_dur = xfInfo_dur.inXForm; fbInfo->al_inXForm_hmm = xfInfo_hmm.al_inXForm; fbInfo->al_inXForm_dur = xfInfo_dur.al_inXForm; fbInfo->paXForm_hmm = xfInfo_hmm.paXForm; fbInfo->paXForm_dur = xfInfo_dur.paXForm; /* generate parameters */ DoGeneration(labfn); numUtt++; } while (NumArgs() > 0); if (trace & T_TOP) { printf("Generation complete - average log prob per frame = %e (%d frames)\n", totalPr / totalT, totalT); } /* Reset stacks */ Dispose(&fbInfoStack, fbInfo); Dispose(&genStack, genInfo); Dispose(&uttStack, utt); ResetHeap(&fbInfoStack); ResetHeap(&uttStack); ResetHeap(&genStack); ResetHeap(&dmStack); ResetHeap(&hmmStack); /* Reset modules */ ResetGen(); ResetAdapt(&xfInfo_hmm, &xfInfo_dur); ResetFB(); ResetUtil(); ResetParm(); ResetModel(); ResetLabel(); ResetWave(); ResetSigP(); ResetMath(); ResetMem(); ResetShell(); Exit(0); return (0); /* never reached -- make compiler happy */ }