Table<LabelType> predict(const TableView<DataType> &samples) { std::vector<LabelType> predictions(samples.rowsNumber()); #pragma omp parallel for for (size_t i = 0; i < samples.rowsNumber(); ++i) { predictions[i] = predict(samples[i]); } return Table<LabelType> (baseLabels.columnsNames(), std::make_move_iterator(predictions.begin()), std::make_move_iterator(predictions.end())); }
double operator () (const TableView<Label> &positive, const TableView<Label> &negative) { std::unordered_set<Label> labels; std::unordered_map<Label, size_t> positiveLabelCount; for (const auto &label : positive) { ++positiveLabelCount[label]; labels.insert(label); } std::unordered_map<Label, size_t> negativeLabelCount; for (const auto &label : negative) { ++negativeLabelCount[label]; labels.insert(label); } size_t samplesNumber = positive.rowsNumber() + negative.rowsNumber(); double entropy = 0; double positivePart = positive.rowsNumber() * 1.0 / samplesNumber; double negativePart = negative.rowsNumber() * 1.0 / samplesNumber; for (const auto &label : labels) { size_t positiveClassSize = positiveLabelCount[label]; size_t negativeClassSize = negativeLabelCount[label]; size_t classSize = positiveClassSize + negativeClassSize; entropy += h(classSize * 1.0 / samplesNumber); if (positive.rowsNumber()) { entropy -= positivePart * h(positiveClassSize * 1.0 / positive.rowsNumber()); } if (negative.rowsNumber()) { entropy -= negativePart * h(negativeClassSize * 1.0 / negative.rowsNumber()); } } return entropy; }
void train(const TableView<Row> &samples, const TableView<Label> &labels) { std::vector<size_t> ind; for(size_t i = 0; i < samples.rowsNumber(); ++i) { ind.push_back(i); } tree = std::unique_ptr<Tree>(new Tree(trainer.train(samples[ind], labels[ind]))); assert(tree->root < tree->size()); }