int StructuralStreamDriftDetector::FindUnstableCheckPointByDistributionChange() { if (check_points.size() == 1) { return -1; } // Accumulate all the check point's frequency tables, so we know all items // in the window. ItemMap<unsigned> items; for (const CheckPoint& checkPoint : check_points) { items.Add(checkPoint.frequency_table); } ItemMap<unsigned> lhs(items); ItemMap<unsigned> rhs; int32_t block_index = check_points.size() - 2; while (block_index > 0) { lhs.Remove(check_points[block_index + 1].frequency_table); rhs.Add(check_points[block_index + 1].frequency_table); auto begin = check_points.begin(); int32_t n_lhs = SizeOfWindow(begin, begin + block_index); int32_t n_rhs = SizeOfWindow(begin + block_index, check_points.end()); uint32_t n = n_lhs + n_rhs; ASSERT(n == SizeOfWindow(begin, check_points.end())); for (auto itr = items.GetIterator(); itr.HasNext(); itr.Next()) { Item item = itr.GetKey(); uint32_t u_lhs = lhs.Get(item); uint32_t u_rhs = rhs.Get(item); double v_lhs = Variance(u_lhs, n_lhs); double v_rhs = Variance(u_rhs, n_rhs); double v = Variance(items.Get(item), n); double absValue = u_lhs / n_lhs - u_rhs / n_rhs; const double mintMinWinLength = 5; // value copied from adWin.java double dd = log(2 * log(n) / dbdd_delta); double m = ((double)1 / ((n_rhs - mintMinWinLength + 1))) + ((double)1 / ((n_lhs - mintMinWinLength + 1))); double epsilon = sqrt(2 * m * v * dd) + (double)2 / 3 * dd * m; bool shouldCut = abs(absValue) > epsilon; if (shouldCut) { return block_index; } } block_index--; } return -1; }
TEST(FPTree, SpoTree) { { ItemMap<unsigned> m; for (unsigned i = 1; i < 100; i++) { Item item(i); EXPECT_FALSE(m.Contains(item)); m.Set(item, i); EXPECT_TRUE(m.Contains(item)); EXPECT_TRUE(m.Get(item) == i); } } { Item::ResetBaseId(); InvertedDataSetIndex index(Census2DataSetReader()); Options options(0, kSpoTree, 0, 0, 0.15, 0, 0, 0, 0); FPTree* spotree = CreateFPTree(&index, options); EXPECT_TRUE(!!spotree); index.Load(); string ts = spotree ->ToString(); cout << "Tree: " << ts << endl; //spotree->DumpToGraphViz("test-output/spotree-census2.dot"); EXPECT_TRUE(spotree->IsSorted()); /* printf("freq before sort:\n"); spotree->DumpFreq(); spotree->Sort(); printf("\n\nfreq AFTER sort:\n"); spotree->DumpFreq(); //spotree->DumpToGraphViz("test-output/cptree-census2.sorted.dot"); ts = spotree->ToString(); cout << "Sorted Tree: " << ts << endl; ASSERT(spotree->IsSorted());*/ delete spotree; } }