Exemple #1
0
	Table<LabelType> predict(const TableView<DataType> &samples) {
		std::vector<LabelType> predictions(samples.rowsNumber());

		#pragma omp parallel for
		for (size_t i = 0; i < samples.rowsNumber(); ++i) {
			predictions[i] = predict(samples[i]);
		}

		return Table<LabelType> (baseLabels.columnsNames(),
								 std::make_move_iterator(predictions.begin()),
								 std::make_move_iterator(predictions.end()));
	}
	double operator () (const TableView<Label> &positive, const TableView<Label> &negative) {
		std::unordered_set<Label> labels;

		std::unordered_map<Label, size_t> positiveLabelCount;
		for (const auto &label : positive) {
			++positiveLabelCount[label];
			labels.insert(label);
		}

		std::unordered_map<Label, size_t> negativeLabelCount;
		for (const auto &label : negative) {
			++negativeLabelCount[label];
			labels.insert(label);
		}

		size_t samplesNumber = positive.rowsNumber() + negative.rowsNumber();
		double entropy = 0;
		double positivePart = positive.rowsNumber() * 1.0 / samplesNumber;
		double negativePart = negative.rowsNumber() * 1.0 / samplesNumber;

		for (const auto &label : labels) {
			size_t positiveClassSize = positiveLabelCount[label];
			size_t negativeClassSize = negativeLabelCount[label];
			size_t classSize = positiveClassSize + negativeClassSize;

			entropy += h(classSize * 1.0 / samplesNumber);
			if (positive.rowsNumber()) {
				entropy -= positivePart * h(positiveClassSize * 1.0 / positive.rowsNumber());
			}
			if (negative.rowsNumber()) {
				entropy -= negativePart * h(negativeClassSize * 1.0 / negative.rowsNumber());
			}
		}
		return entropy;
	}
Exemple #3
0
	void train(const TableView<Row> &samples, const TableView<Label> &labels) {
		std::vector<size_t> ind;
		for(size_t i = 0; i < samples.rowsNumber(); ++i) {
            ind.push_back(i);
		}
		tree = std::unique_ptr<Tree>(new Tree(trainer.train(samples[ind], labels[ind])));
		assert(tree->root < tree->size());
	}