示例#1
0
文件: svs.cpp 项目: rizar/rossvs
void SVSBuilder::BuildDF(int y, int x, DecisionFunction * df) {
    df->Reset(Gamma, SVM().Rho);
    PointType const& point = Input->at(x, y);

    float const rawIndex = Pixel2RawIndex[y * Width_ + x];
    // always +5 to for feeling safe...
    float const kernelRadiusInPixels = (int)ceil(KernelRadius / LocalResolution[rawIndex]) + 5;

    Grid2SV_.TraverseRectangle(y, x, kernelRadiusInPixels,
            [this, &point, &df] (int /*y*/, int /*x*/, int idx) {
                PointType const& sv = Objects->operator[](idx);
                if (pcl::squaredEuclideanDistance(point, sv) <= KernelRadius2) {
                    df->AddSupportVector(sv, SVM().Alphas()[idx]);
                }
                return true;
            });
}
示例#2
0
文件: svs.cpp 项目: rizar/rossvs
void SVSBuilder::BuildGrid2SV() {
    Grid2SV_.Resize(Height_, Width_);
    for (int i = 0; i < Objects->size(); ++i) {
        if (SVM().Alphas()[i] > 0) {
            auto pos = Num2Grid_[i];
            Grid2SV_.at(pos.first, pos.second).push_back(i);
        }
    }
}
示例#3
0
void setup() {
    stream.setLabelsForAllDimensions({"audio"});

    pipeline.addFeatureExtractionModule(
        FFT(kFftWindowSize, kFftHopSize,
            DIM, FFT::HAMMING_WINDOW, true, false));

    MFCC::Options options;
    options.sample_rate = kSampleRate;
    options.fft_size = kFftWindowSize / 2;
    options.start_freq = 300;
    options.end_freq = 3700;
    options.num_tri_filter = 26;
    options.num_cepstral_coeff = 12;
    options.lifter_param = 22;
    options.use_vad = true;
    options.noise_level = noise_level;

    pipeline.addFeatureExtractionModule(MFCC(options));

    pipeline.setClassifier(SVM());
    // GMM(16, true, false, 1, 100, 0.001));

    // In post processing, we wait #n predicitons. If m out of n predictions are
    // from the same class, we declare the class as the right one.
    //
    // n = (duration * sample_rate) / frame_size
    //   where duration    = post_duration
    //         sample_rate = kSampleRate
    //         frame_size  = kFftHopSize
    // m = n * post_ratio
    int num_predictions = post_duration / 1000 * kSampleRate / kFftHopSize;
    pipeline.addPostProcessingModule(
            ClassLabelFilter(num_predictions * post_ratio, num_predictions));

    auto ratio_updater = [](double new_ratio) {
        ClassLabelFilter* filter =
            dynamic_cast<ClassLabelFilter*>(pipeline.getPostProcessingModule(0));
        // Recalculate num_predictions as post_duration might have been changed
        int num_predictions = post_duration / 1000 * kSampleRate / kFftHopSize;
        filter->setMinimumCount(new_ratio * num_predictions);
    };

    auto duration_updater = [](int new_duration) {
        ClassLabelFilter* filter =
            dynamic_cast<ClassLabelFilter*>(pipeline.getPostProcessingModule(0));
        // Recalculate num_predictions as post_duration might have been changed
        int num_predictions = post_duration / 1000 * kSampleRate / kFftHopSize;
        filter->setBufferSize(num_predictions);
    };

    auto noise_updater = [](int new_noise_level) {
        MFCC *mfcc = dynamic_cast<MFCC*>(pipeline.getFeatureExtractionModule(1));
        mfcc->setNoiseLevel(new_noise_level);
    };

    registerTuneable(noise_level, 0, 20,
                     "Noise Level",
                     "The threshold for the system to distinguish between "
                     "ambient noise and speech/sound",
                     noise_updater);

    registerTuneable(post_duration, 0, 2000,
                     "Duration",
                     "Time (in ms) that is considered as a whole "
                     "for smoothing the prediction",
                     duration_updater);

    registerTuneable(post_ratio, 0.0f, 1.0f,
                     "Ratio",
                     "The portion of time in duration that "
                     "should be from the same class",
                     ratio_updater);

    useInputStream(stream);
    useOutputStream(oStream);
    usePipeline(pipeline);
    useLeaveOneOutScoring(false);
    setGUIBufferSize(kSampleRate);
}