Esempio n. 1
0
void VariableWindowDataSet::Pop() {
  auto& transaction = Front();
  if (first_chunk_start_offset + 1 < chunk_size) {
    // Update the inverted index to clear the bits set corresponding to
    // the items in this transaction.
    auto offset = transaction.id - first_chunk_start_tid;
    ASSERT(offset == first_chunk_start_offset);
    ASSERT((offset / chunk_size) == 0); // Should be first chunk.
    auto bit_num = offset % chunk_size;

    for (uint32_t i = 0; i < transaction.items.size(); i++) {
      Item item = transaction.items[i];
      // Assert that the bit was actually set first!
      ASSERT(index[item.GetIndex()][0][bit_num] == true);
      index[item.GetIndex()][0].set(bit_num, false);
    }
    first_chunk_start_offset++;
  } else {
    // We're removing the last transaction in the first chunk/bitset. We
    // must drop every tidlist's first chunk.
    for (uint32_t i = 0; i < index.size(); i++) {
      TidList& tidlist = index[i];
      if (tidlist.size() > 0) {
        // Some tid lists can be empty if the corresponding item only appeared
        // in blocks that have already been purged.
        tidlist.erase(tidlist.begin(), tidlist.begin() + 1);
      }
    }
    first_chunk_start_offset = 0;
    first_chunk_start_tid += chunk_size;
  }
  transactions.pop_front();
}
Esempio n. 2
0
int VariableWindowDataSet::Count(const ItemSet& aItemSet) const {
  auto& items = aItemSet.mItems;

  // Find item with the shortest tidlist.
  auto itr = items.begin();
  Item smallest_item = *itr;
  // Note: We do not increment iterator, so we do the existence in the
  // loop check below.
  for (; itr != items.end(); itr++) {
    Item item = *itr;
    if (item.GetIndex() >= index.size()) {
      // Item does not exist in itemset, so it will have 0 count.
      return 0;
    }
    if (index[item.GetIndex()].size() < index[smallest_item.GetIndex()].size()) {
      smallest_item = item;
    }
  }

  // AND the shortest tidlist with all other item's tidlists, and count that.
  size_t count = 0;
  const TidList& shortest = index[smallest_item.GetIndex()];
  for (uint32_t i = 0; i < shortest.size(); i++) {
    bitset<chunk_size> b(shortest[i]);
    if (b.none()) {
      // No bits set in this chunk, no point iterating over other chunks, as
      // the result will be 0 when we AND with them.
      continue;
    }
    for (auto itr = items.begin(); itr != items.end(); itr++) {
      if (*itr == smallest_item) {
        continue;
      }
      const TidList& other = index[itr->GetIndex()];
      if (i < other.size()) {
        b &= other[i];
      }
    }
    count += b.count();
  }
  return (int)count;
}
Esempio n. 3
0
int VariableWindowDataSet::Count(const Item& aItem) const {
  auto item_idx = aItem.GetIndex();
  if (item_idx >= index.size()) {
    return 0;
  }
  const TidList& tidlist = index.at(item_idx);
  size_t count = 0;
  for (auto i = 0; i < tidlist.size(); i++) {
    const std::bitset<chunk_size>& b = tidlist[i];
    count += b.count();
  }
  return (int)count;
}