CClassifier * train(EClassifierType clt, CCaptcha * captcha, unsigned train_vec, unsigned test_num) { CClassifier * cl = NULL; try { throw_null((cl = new CClassifier(clt))); unsigned u, v, tv, t, num, yes; Mat train_data(train_vec, dim, CV_32FC1); Mat responses(train_vec, 1, CV_32SC1); for(u = 0; u < train_vec; u++) { num = rand() % 10; Mat img = (* captcha)(num); Mat vc = vec(img); for(t = 0; t < dim; t++) train_data.at<float>(u, t) = vc.at<float>(0, t); responses.at<int>(u, 0) = num; } cl->train(train_data, responses); for(u = 0, yes = 0; u < test_num; u++) { num = rand() % 10; Mat img = (* captcha)(num); Mat vc = vec(img); if(num == cl->predict(vc)) yes++; } printf("Правильно классифицированных векторов: %u из %u (%f %%)\n", yes, test_num, yes * 100 / (double) test_num); } catch(...) { if(cl != NULL) delete cl; cl = NULL; } return cl; }
double Svm::classify() { if (data->train_projections.empty()) { return -1.; } int train_size = int(data->train_projections.size()); int components = data->eigenvalues.rows; Mat train_data(train_size, components, CV_32FC1); Mat label_data(train_size, 1, CV_32SC1); for (int i = 0; i < train_size; i++) { data->train_projections[i].row(0).copyTo(train_data.row(i)); label_data.at<int>(i, 0) = data->labels[i]; } Ptr<ml::SVM> svm = ml::SVM::create(); svm->setType(ml::SVM::C_SVC); svm->setGamma(0.0001); svm->setC(2000); svm->setKernel(ml::SVM::RBF); svm->train(train_data, ml::ROW_SAMPLE, label_data); int predictions = 0; int total = 0; for (int i = 0; i < data->test_projections.size(); i++) { for (const auto &projection : data->test_projections[i]) { total++; int prediction = svm->predict(projection); if (prediction == i) predictions++; } } return double(predictions * 100) / total; }
int main() { initialize(); train_data(); test_data(); }
void FNeuralNetLMBase::TrainLM(const string &validationfile, const string &outbase, bool nce_ppl) { // ============= // Prepare for the training // Equivalent to ReadLM word_vocab_.ReadVocabFromTxt(word_vocab_filename_); if (word_vocab_.empty()) { cerr << "empty word vocabulary!" << endl; exit(EXIT_FAILURE); } factor_vocab_.ReadVocabFromTxt(factor_vocab_filename_); if (factor_vocab_.empty()) { cerr << "empty factor vocabulary!" << endl; exit(EXIT_FAILURE); } ReadDecompFromTxt(decomp_filename_); PrintParams(); CheckParams(); AllocateModel(); InitializeNeuralNet(); // ==== END ==== // Read the data FNNLMDataReader train_data(train_filenames_, &word_vocab_, &factor_vocab_, shuffle_datafiles_, shuffle_sentences_); vector<string> validation_filenames = { validationfile }; FNNLMDataReader validation_data(validation_filenames, &word_vocab_, &factor_vocab_, false, false); // Set NCE sampling. if (nce_) { // TODO: flatten noise_distribution_? vector<int> word_count(word_vocab_.size(), 0); int num_word_tokens = 0; const size_t eos_widx = word_vocab().eos_idx(); vector<int> factor_count(factor_vocab_.size(), 0); int num_factor_tokens = 0; const size_t eos_fidx = factor_vocab().eos_idx(); vector<pair<size_t, vector<size_t>>> sentence; train_data.StartEpoch(); while(train_data.GetSentence(sentence)) { for (vector<pair<size_t, vector<size_t>>>::const_iterator it = sentence.begin(); it != sentence.end(); ++it) { word_count[it->first]++; num_word_tokens++; if (weight_factor_output_ > 0) { for (size_t p = 0; p < it->second.size(); p++) { factor_count[it->second[p]]++; num_factor_tokens++; } } } word_count[eos_widx]++; num_word_tokens++; if (weight_factor_output_ > 0) { factor_count[eos_fidx]++; num_factor_tokens++; } } word_noise_distribution_ = Distribution(word_count.begin(), word_count.end()); word_noise_pdf_ = word_noise_distribution_.param().probabilities(); if (weight_factor_output_ > 0) { factor_noise_distribution_ = Distribution(factor_count.begin(), factor_count.end()); factor_noise_pdf_ = factor_noise_distribution_.param().probabilities(); } NCECheckSampling(); log_num_negative_samples_ = log(num_negative_samples_); } BatchSGDTrain(train_data, validation_data, outbase, nce_ppl); cout << "================================================================================" << endl; cout << "Log-likelihood (base e) on validation is: " \ << EvalLM(validation_data, false) << endl; }