void FScore::calculate_from_confusion_matrix(const MatrixXi &cmat) { // Sometimes a class is never predicted, leading to a sum that is 0. // When dividing later, there can be a nan. Avoid setting the value to 1. // Setting to 1 is correct because the value to divide will be 0. VectorXi sums = cmat.colwise().sum().unaryExpr(std::ptr_fun(avoid_zero)); recall_ = cmat.cast<double>().diagonal().array() / sums.cast<double>().array(); sums = cmat.rowwise().sum().unaryExpr(std::ptr_fun(avoid_zero)); precision_ = cmat.cast<double>().diagonal().array() / sums.cast<double>().array(); // Apply the same fix to the sum of the recall and precision arrays. VectorXd s = (recall_ + precision_).unaryExpr(std::ptr_fun(avoid_zero_double)); fscore_ = 2 * recall_.array() * precision_.array() / s.array(); }
double nominal_gini_gain(const VectorXi &values, const VectorXi &classes) { MatrixXi T = get_cross_table(values, classes); // total_per_value(k) is the number of times that value k appears MatrixXi total_per_value = T.rowwise().sum(); VectorXd probability_of_value = total_per_value.cast<double>() /values.rows(); // Fraction of each class per value MatrixXd fractions_yx = divide_colwise( T.cast<double>(), total_per_value.cast<double>()); // Gini impurity for each value: sum fractions^2 over the rows VectorXd G = fractions_yx.array().square().rowwise().sum(); double total_gini = 1 - probability_of_value.transpose() * G; VectorXi counts = T.colwise().sum(); double gain = gini(counts.cast<double>()) - total_gini; return gain; }
double nominal_entropy_gain(const VectorXi &values, const VectorXi &classes) { MatrixXi T = get_cross_table(values, classes); MatrixXi total_per_value = T.rowwise().sum(); VectorXd probability_of_value = total_per_value.cast<double>() /values.rows(); MatrixXd fractions_yx = divide_colwise( T.cast<double>(), total_per_value.cast<double>()); double epsilon = std::numeric_limits<double>::epsilon(); double invlog = 1 / std::log(2); MatrixXd H = (- invlog) * fractions_yx.array() * (fractions_yx.array() + epsilon).log().array(); double total_entropy = probability_of_value.transpose() * H.rowwise().sum(); VectorXi counts = T.colwise().sum(); double gain = entropy(counts.cast<double>()) - total_entropy; return gain; }
int main(int, char**) { cout.precision(3); MatrixXi m = MatrixXi::Random(3,4); cout << "Here is the matrix m:" << endl << m << endl; cout << "Here is the rowwise reverse of m:" << endl << m.rowwise().reverse() << endl; cout << "Here is the colwise reverse of m:" << endl << m.colwise().reverse() << endl; cout << "Here is the coefficient (1,0) in the rowise reverse of m:" << endl << m.rowwise().reverse()(1,0) << endl; cout << "Let us overwrite this coefficient with the value 4." << endl; //m.colwise().reverse()(1,0) = 4; cout << "Now the matrix m is:" << endl << m << endl; return 0; }