int
StructuralStreamDriftDetector::FindUnstableCheckPointByDistributionChange()
{
  if (check_points.size() == 1) {
    return -1;
  }

  // Accumulate all the check point's frequency tables, so we know all items
  // in the window.
  ItemMap<unsigned> items;
  for (const CheckPoint& checkPoint : check_points) {
    items.Add(checkPoint.frequency_table);
  }

  ItemMap<unsigned> lhs(items);
  ItemMap<unsigned> rhs;

  int32_t block_index = check_points.size() - 2;
  while (block_index > 0) {

    lhs.Remove(check_points[block_index + 1].frequency_table);
    rhs.Add(check_points[block_index + 1].frequency_table);

    auto begin = check_points.begin();
    int32_t n_lhs = SizeOfWindow(begin, begin + block_index);
    int32_t n_rhs = SizeOfWindow(begin + block_index, check_points.end());
    uint32_t n = n_lhs + n_rhs;
    ASSERT(n == SizeOfWindow(begin, check_points.end()));

    for (auto itr = items.GetIterator(); itr.HasNext(); itr.Next()) {
      Item item = itr.GetKey();
      uint32_t u_lhs = lhs.Get(item);
      uint32_t u_rhs = rhs.Get(item);
      double v_lhs = Variance(u_lhs, n_lhs);
      double v_rhs = Variance(u_rhs, n_rhs);
      double v = Variance(items.Get(item), n);

      double absValue = u_lhs / n_lhs - u_rhs / n_rhs;
      const double mintMinWinLength = 5; // value copied from adWin.java
      double dd = log(2 * log(n) / dbdd_delta);
      double m = ((double)1 / ((n_rhs - mintMinWinLength + 1))) +
                 ((double)1 / ((n_lhs - mintMinWinLength + 1)));
      double epsilon = sqrt(2 * m * v * dd) + (double)2 / 3 * dd * m;

      bool shouldCut = abs(absValue) > epsilon;
      if (shouldCut) {
        return block_index;
      }
    }
    block_index--;
  }
  return -1;
}
Пример #2
0
TEST(FPTree, SpoTree) {
    {
        ItemMap<unsigned> m;
        for (unsigned i = 1; i < 100; i++) {
            Item item(i);
            EXPECT_FALSE(m.Contains(item));
            m.Set(item, i);
            EXPECT_TRUE(m.Contains(item));
            EXPECT_TRUE(m.Get(item) == i);
        }
    }
    {
        Item::ResetBaseId();

        InvertedDataSetIndex index(Census2DataSetReader());
        Options options(0, kSpoTree, 0, 0, 0.15, 0, 0, 0, 0);
        FPTree* spotree = CreateFPTree(&index, options);
        EXPECT_TRUE(!!spotree);
        index.Load();

        string ts = spotree ->ToString();
        cout << "Tree: " << ts << endl;
        //spotree->DumpToGraphViz("test-output/spotree-census2.dot");

        EXPECT_TRUE(spotree->IsSorted());
        /*
        printf("freq before sort:\n");
        spotree->DumpFreq();
        spotree->Sort();

        printf("\n\nfreq AFTER sort:\n");
        spotree->DumpFreq();

        //spotree->DumpToGraphViz("test-output/cptree-census2.sorted.dot");
        ts = spotree->ToString();
        cout << "Sorted Tree: " << ts << endl;
        ASSERT(spotree->IsSorted());*/

        delete spotree;
    }
}