double PropertiesComparator::getDistance(jsonextend newSample, jsonextend patternSample) { // TODO not all properties implemented // TODO to much copy paste! double result = 0.0; if (newSample.is_null()) return 10000; if (patternSample.is_null()) return 10000; for (auto iter = newSample.as_object().cbegin(); iter != newSample.as_object().cend(); iter++) { if (iter->first != "name") result += relativeError(iter->second.as_double(), patternSample[iter->first].as_double()); } return result; }
void compareLists(const float* refDist, const faiss::Index::idx_t* refInd, const float* testDist, const faiss::Index::idx_t* testInd, int dim1, int dim2, const std::string& configMsg, bool printBasicStats, bool printDiffs, bool assertOnErr, float maxRelativeError, float pctMaxDiff1, float pctMaxDiffN) { float maxAbsErr = 0.0f; for (int i = 0; i < dim1 * dim2; ++i) { maxAbsErr = std::max(maxAbsErr, std::abs(refDist[i] - testDist[i])); } int numResults = dim1 * dim2; // query -> {index -> result position} std::vector<std::unordered_map<faiss::Index::idx_t, int>> refIndexMap; for (int query = 0; query < dim1; ++query) { std::unordered_map<faiss::Index::idx_t, int> indices; for (int result = 0; result < dim2; ++result) { indices[lookup(refInd, query, result, dim1, dim2)] = result; } refIndexMap.emplace_back(std::move(indices)); } // See how far off the indices are // Keep track of the difference for each entry std::vector<std::vector<int>> indexDiffs; int diff1 = 0; // index differs by 1 int diffN = 0; // index differs by >1 int diffInf = 0; // index not found in the other int nonUniqueIndices = 0; double avgDiff = 0.0; int maxDiff = 0; float maxRelErr = 0.0f; for (int query = 0; query < dim1; ++query) { std::vector<int> diffs; std::set<faiss::Index::idx_t> uniqueIndices; auto& indices = refIndexMap[query]; for (int result = 0; result < dim2; ++result) { auto t = lookup(testInd, query, result, dim1, dim2); // All indices reported within a query should be unique; this is // a serious error if is otherwise the case bool uniqueIndex = uniqueIndices.count(t) == 0; if (assertOnErr) { EXPECT_TRUE(uniqueIndex) << configMsg << " " << query << " " << result << " " << t; } if (!uniqueIndex) { ++nonUniqueIndices; } else { uniqueIndices.insert(t); } auto it = indices.find(t); if (it != indices.end()) { int diff = std::abs(result - it->second); diffs.push_back(diff); if (diff == 1) { ++diff1; maxDiff = std::max(diff, maxDiff); } else if (diff > 1) { ++diffN; maxDiff = std::max(diff, maxDiff); } avgDiff += (double) diff; } else { ++diffInf; diffs.push_back(-1); // don't count this for maxDiff } auto refD = lookup(refDist, query, result, dim1, dim2); auto testD = lookup(testDist, query, result, dim1, dim2); float relErr = relativeError(refD, testD); if (assertOnErr) { EXPECT_LE(relErr, maxRelativeError) << configMsg << " (" << query << ", " << result << ") refD: " << refD << " testD: " << testD; } maxRelErr = std::max(maxRelErr, relErr); } indexDiffs.emplace_back(std::move(diffs)); } if (assertOnErr) { EXPECT_LE((float) (diff1 + diffN + diffInf), (float) numResults * pctMaxDiff1) << configMsg; // Don't count diffInf because that could be diff1 as far as we // know EXPECT_LE((float) diffN, (float) numResults * pctMaxDiffN) << configMsg; } avgDiff /= (double) numResults; if (printBasicStats) { if (!configMsg.empty()) { printf("Config\n" "----------------------------\n" "%s\n", configMsg.c_str()); } printf("Result error and differences\n" "----------------------------\n" "max abs diff %.7f rel diff %.7f\n" "idx diff avg: %.5g max: %d\n" "idx diff of 1: %d (%.3f%% of queries)\n" "idx diff of >1: %d (%.3f%% of queries)\n" "idx diff not found: %d (%.3f%% of queries)" " [typically a last element inversion]\n" "non-unique indices: %d (a serious error if >0)\n", maxAbsErr, maxRelErr, avgDiff, maxDiff, diff1, 100.0f * (float) diff1 / (float) numResults, diffN, 100.0f * (float) diffN / (float) numResults, diffInf, 100.0f * (float) diffInf / (float) numResults, nonUniqueIndices); } if (printDiffs) { printf("differences:\n"); printf("==================\n"); for (int query = 0; query < dim1; ++query) { for (int result = 0; result < dim2; ++result) { long refI = lookup(refInd, query, result, dim1, dim2); long testI = lookup(testInd, query, result, dim1, dim2); if (refI != testI) { float refD = lookup(refDist, query, result, dim1, dim2); float testD = lookup(testDist, query, result, dim1, dim2); float maxDist = std::max(refD, testD); float delta = std::abs(refD - testD); float relErr = delta / maxDist; if (refD == testD) { printf("(%d, %d [%d]) (ref %ld tst %ld dist ==)\n", query, result, indexDiffs[query][result], refI, testI); } else { printf("(%d, %d [%d]) (ref %ld tst %ld abs %.8f " "rel %.8f ref %a tst %a)\n", query, result, indexDiffs[query][result], refI, testI, delta, relErr, refD, testD); } } } } } }