예제 #1
0
CClassifier * train(EClassifierType clt, CCaptcha * captcha, unsigned train_vec, unsigned test_num)
{
	CClassifier * cl = NULL;

	try
	{
		throw_null((cl = new CClassifier(clt)));

		unsigned u, v, tv, t, num, yes;
		Mat train_data(train_vec, dim, CV_32FC1);
		Mat responses(train_vec, 1, CV_32SC1);

		for(u = 0; u < train_vec; u++)
		{
			num = rand() % 10;

			Mat img = (* captcha)(num);
			Mat vc = vec(img);

			for(t = 0; t < dim; t++)
				train_data.at<float>(u, t) = vc.at<float>(0, t);

			responses.at<int>(u, 0) = num;
		}

		cl->train(train_data, responses);

		for(u = 0, yes = 0; u < test_num; u++)
		{
			num = rand() % 10;

			Mat img = (* captcha)(num);
			Mat vc = vec(img);

			if(num == cl->predict(vc))
				yes++;
		}

		printf("Правильно классифицированных векторов: %u из %u (%f %%)\n", yes, test_num, yes * 100 / (double) test_num);
	}
	catch(...)
	{
		if(cl != NULL)
			delete cl;

		cl = NULL;
	}

	return cl;
}
예제 #2
0
파일: svm.cpp 프로젝트: melias122/scratch
double Svm::classify()
{
	if (data->train_projections.empty()) {
		return -1.;
	}

	int train_size = int(data->train_projections.size());
	int components = data->eigenvalues.rows;

	Mat train_data(train_size, components, CV_32FC1);
	Mat label_data(train_size, 1, CV_32SC1);

	for (int i = 0; i < train_size; i++) {
		data->train_projections[i].row(0).copyTo(train_data.row(i));
		label_data.at<int>(i, 0) = data->labels[i];
	}

	Ptr<ml::SVM> svm = ml::SVM::create();

	svm->setType(ml::SVM::C_SVC);
	svm->setGamma(0.0001);
	svm->setC(2000);
	svm->setKernel(ml::SVM::RBF);
	svm->train(train_data, ml::ROW_SAMPLE, label_data);

	int predictions = 0;
	int total = 0;

	for (int i = 0; i < data->test_projections.size(); i++) {
		for (const auto &projection : data->test_projections[i]) {
			total++;
			int prediction = svm->predict(projection);
			if (prediction == i)
				predictions++;
		}
	}

	return double(predictions * 100) / total;
}
예제 #3
0
파일: two_layer.cpp 프로젝트: vivekn/nnet
int main() {
	initialize();
	train_data();
	test_data();
}
예제 #4
0
void FNeuralNetLMBase::TrainLM(const string &validationfile,
                               const string &outbase,
                               bool nce_ppl) {
  // =============
  // Prepare for the training
  // Equivalent to ReadLM
  word_vocab_.ReadVocabFromTxt(word_vocab_filename_);
  if (word_vocab_.empty()) {
    cerr << "empty word vocabulary!" << endl;
    exit(EXIT_FAILURE);
  }
  factor_vocab_.ReadVocabFromTxt(factor_vocab_filename_);
  if (factor_vocab_.empty()) {
    cerr << "empty factor vocabulary!" << endl;
    exit(EXIT_FAILURE);
  }
  ReadDecompFromTxt(decomp_filename_);

  PrintParams();
  CheckParams();
  AllocateModel();
  InitializeNeuralNet();
  // ==== END ====

  // Read the data
  FNNLMDataReader train_data(train_filenames_, &word_vocab_, &factor_vocab_,
                             shuffle_datafiles_, shuffle_sentences_);
  vector<string> validation_filenames = { validationfile };
  FNNLMDataReader validation_data(validation_filenames, &word_vocab_, &factor_vocab_, false, false);

  // Set NCE sampling.
  if (nce_) {
    // TODO: flatten noise_distribution_?
    vector<int> word_count(word_vocab_.size(), 0);
    int num_word_tokens = 0;
    const size_t eos_widx = word_vocab().eos_idx();
    vector<int> factor_count(factor_vocab_.size(), 0);
    int num_factor_tokens = 0;
    const size_t eos_fidx = factor_vocab().eos_idx();

    vector<pair<size_t, vector<size_t>>> sentence;

    train_data.StartEpoch();
    while(train_data.GetSentence(sentence)) {
      for (vector<pair<size_t, vector<size_t>>>::const_iterator it = sentence.begin(); it != sentence.end(); ++it) {
        word_count[it->first]++;
        num_word_tokens++;
        if (weight_factor_output_ > 0) {
          for (size_t p = 0; p < it->second.size(); p++) {
            factor_count[it->second[p]]++;
            num_factor_tokens++;
          }
        }
      }
      word_count[eos_widx]++;
      num_word_tokens++;
      if (weight_factor_output_ > 0) {
        factor_count[eos_fidx]++;
        num_factor_tokens++;
      }
    }

    word_noise_distribution_ = Distribution(word_count.begin(), word_count.end());
    word_noise_pdf_ = word_noise_distribution_.param().probabilities();
    if (weight_factor_output_ > 0) {
      factor_noise_distribution_ = Distribution(factor_count.begin(), factor_count.end());
      factor_noise_pdf_ = factor_noise_distribution_.param().probabilities();
    }
    NCECheckSampling();
    log_num_negative_samples_ = log(num_negative_samples_);
  }

  BatchSGDTrain(train_data, validation_data, outbase, nce_ppl);

  cout << "================================================================================" << endl;
  cout << "Log-likelihood (base e) on validation is: " \
      << EvalLM(validation_data, false) << endl;
}