double ThresholdCurve::getROCArea(Instances &tcurve) { const int n = tcurve.numInstances(); if (RELATION_NAME != tcurve.getRelationName() || (n == 0)) { return std::numeric_limits<double>::quiet_NaN(); } const int tpInd = tcurve.attribute(TRUE_POS_NAME).index(); const int fpInd = tcurve.attribute(FALSE_POS_NAME).index(); const double_array tpVals = tcurve.attributeToDoubleArray(tpInd); const double_array fpVals = tcurve.attributeToDoubleArray(fpInd); double area = 0.0, cumNeg = 0.0; const double totalPos = tpVals[0]; const double totalNeg = fpVals[0]; for (int i = 0; i < n; i++) { double cip, cin; if (i < n - 1) { cip = tpVals[i] - tpVals[i + 1]; cin = fpVals[i] - fpVals[i + 1]; } else { cip = tpVals[n - 1]; cin = fpVals[n - 1]; } area += cip * (cumNeg + (0.5 * cin)); cumNeg += cin; } area /= (totalNeg * totalPos); return area; }
double ThresholdCurve::getNPointPrecision(Instances &tcurve, const int n) { if (RELATION_NAME != tcurve.getRelationName() || (tcurve.numInstances() == 0)) { return std::numeric_limits<double>::quiet_NaN(); } int recallInd = tcurve.attribute(RECALL_NAME).index(); int precisInd = tcurve.attribute(PRECISION_NAME).index(); double_array recallVals = tcurve.attributeToDoubleArray(recallInd); int_array sorted = Utils::Sort(recallVals); double isize = 1.0 / (n - 1); double psum = 0; for (int i = 0; i < n; i++) { int pos = binarySearch(sorted, recallVals, i * isize); double recall = recallVals[sorted[pos]]; double precis = tcurve.instance(sorted[pos]).value(precisInd); // interpolate figures for non-endpoints while ((pos != 0) && (pos < sorted.size() - 1)) { pos++; double recall2 = recallVals[sorted[pos]]; if (recall2 != recall) { double precis2 = tcurve.instance(sorted[pos]).value(precisInd); double slope = (precis2 - precis) / (recall2 - recall); double offset = precis - recall * slope; precis = isize * i * slope + offset; break; } } psum += precis; } return psum / n; }
int ThresholdCurve::getThresholdInstance(Instances &tcurve, const double threshold) { if (RELATION_NAME != tcurve.getRelationName() || (tcurve.numInstances() == 0) || (threshold < 0) || (threshold > 1.0)) { return -1; } if (tcurve.numInstances() == 1) { return 0; } double_array tvals = tcurve.attributeToDoubleArray(tcurve.numAttributes() - 1); int_array sorted = Utils::Sort(tvals); return binarySearch(sorted, tvals, threshold); }