void SVSBuilder::BuildDF(int y, int x, DecisionFunction * df) { df->Reset(Gamma, SVM().Rho); PointType const& point = Input->at(x, y); float const rawIndex = Pixel2RawIndex[y * Width_ + x]; // always +5 to for feeling safe... float const kernelRadiusInPixels = (int)ceil(KernelRadius / LocalResolution[rawIndex]) + 5; Grid2SV_.TraverseRectangle(y, x, kernelRadiusInPixels, [this, &point, &df] (int /*y*/, int /*x*/, int idx) { PointType const& sv = Objects->operator[](idx); if (pcl::squaredEuclideanDistance(point, sv) <= KernelRadius2) { df->AddSupportVector(sv, SVM().Alphas()[idx]); } return true; }); }
void SVSBuilder::BuildGrid2SV() { Grid2SV_.Resize(Height_, Width_); for (int i = 0; i < Objects->size(); ++i) { if (SVM().Alphas()[i] > 0) { auto pos = Num2Grid_[i]; Grid2SV_.at(pos.first, pos.second).push_back(i); } } }
void setup() { stream.setLabelsForAllDimensions({"audio"}); pipeline.addFeatureExtractionModule( FFT(kFftWindowSize, kFftHopSize, DIM, FFT::HAMMING_WINDOW, true, false)); MFCC::Options options; options.sample_rate = kSampleRate; options.fft_size = kFftWindowSize / 2; options.start_freq = 300; options.end_freq = 3700; options.num_tri_filter = 26; options.num_cepstral_coeff = 12; options.lifter_param = 22; options.use_vad = true; options.noise_level = noise_level; pipeline.addFeatureExtractionModule(MFCC(options)); pipeline.setClassifier(SVM()); // GMM(16, true, false, 1, 100, 0.001)); // In post processing, we wait #n predicitons. If m out of n predictions are // from the same class, we declare the class as the right one. // // n = (duration * sample_rate) / frame_size // where duration = post_duration // sample_rate = kSampleRate // frame_size = kFftHopSize // m = n * post_ratio int num_predictions = post_duration / 1000 * kSampleRate / kFftHopSize; pipeline.addPostProcessingModule( ClassLabelFilter(num_predictions * post_ratio, num_predictions)); auto ratio_updater = [](double new_ratio) { ClassLabelFilter* filter = dynamic_cast<ClassLabelFilter*>(pipeline.getPostProcessingModule(0)); // Recalculate num_predictions as post_duration might have been changed int num_predictions = post_duration / 1000 * kSampleRate / kFftHopSize; filter->setMinimumCount(new_ratio * num_predictions); }; auto duration_updater = [](int new_duration) { ClassLabelFilter* filter = dynamic_cast<ClassLabelFilter*>(pipeline.getPostProcessingModule(0)); // Recalculate num_predictions as post_duration might have been changed int num_predictions = post_duration / 1000 * kSampleRate / kFftHopSize; filter->setBufferSize(num_predictions); }; auto noise_updater = [](int new_noise_level) { MFCC *mfcc = dynamic_cast<MFCC*>(pipeline.getFeatureExtractionModule(1)); mfcc->setNoiseLevel(new_noise_level); }; registerTuneable(noise_level, 0, 20, "Noise Level", "The threshold for the system to distinguish between " "ambient noise and speech/sound", noise_updater); registerTuneable(post_duration, 0, 2000, "Duration", "Time (in ms) that is considered as a whole " "for smoothing the prediction", duration_updater); registerTuneable(post_ratio, 0.0f, 1.0f, "Ratio", "The portion of time in duration that " "should be from the same class", ratio_updater); useInputStream(stream); useOutputStream(oStream); usePipeline(pipeline); useLeaveOneOutScoring(false); setGUIBufferSize(kSampleRate); }