void VariableWindowDataSet::Pop() { auto& transaction = Front(); if (first_chunk_start_offset + 1 < chunk_size) { // Update the inverted index to clear the bits set corresponding to // the items in this transaction. auto offset = transaction.id - first_chunk_start_tid; ASSERT(offset == first_chunk_start_offset); ASSERT((offset / chunk_size) == 0); // Should be first chunk. auto bit_num = offset % chunk_size; for (uint32_t i = 0; i < transaction.items.size(); i++) { Item item = transaction.items[i]; // Assert that the bit was actually set first! ASSERT(index[item.GetIndex()][0][bit_num] == true); index[item.GetIndex()][0].set(bit_num, false); } first_chunk_start_offset++; } else { // We're removing the last transaction in the first chunk/bitset. We // must drop every tidlist's first chunk. for (uint32_t i = 0; i < index.size(); i++) { TidList& tidlist = index[i]; if (tidlist.size() > 0) { // Some tid lists can be empty if the corresponding item only appeared // in blocks that have already been purged. tidlist.erase(tidlist.begin(), tidlist.begin() + 1); } } first_chunk_start_offset = 0; first_chunk_start_tid += chunk_size; } transactions.pop_front(); }
int VariableWindowDataSet::Count(const ItemSet& aItemSet) const { auto& items = aItemSet.mItems; // Find item with the shortest tidlist. auto itr = items.begin(); Item smallest_item = *itr; // Note: We do not increment iterator, so we do the existence in the // loop check below. for (; itr != items.end(); itr++) { Item item = *itr; if (item.GetIndex() >= index.size()) { // Item does not exist in itemset, so it will have 0 count. return 0; } if (index[item.GetIndex()].size() < index[smallest_item.GetIndex()].size()) { smallest_item = item; } } // AND the shortest tidlist with all other item's tidlists, and count that. size_t count = 0; const TidList& shortest = index[smallest_item.GetIndex()]; for (uint32_t i = 0; i < shortest.size(); i++) { bitset<chunk_size> b(shortest[i]); if (b.none()) { // No bits set in this chunk, no point iterating over other chunks, as // the result will be 0 when we AND with them. continue; } for (auto itr = items.begin(); itr != items.end(); itr++) { if (*itr == smallest_item) { continue; } const TidList& other = index[itr->GetIndex()]; if (i < other.size()) { b &= other[i]; } } count += b.count(); } return (int)count; }
int VariableWindowDataSet::Count(const Item& aItem) const { auto item_idx = aItem.GetIndex(); if (item_idx >= index.size()) { return 0; } const TidList& tidlist = index.at(item_idx); size_t count = 0; for (auto i = 0; i < tidlist.size(); i++) { const std::bitset<chunk_size>& b = tidlist[i]; count += b.count(); } return (int)count; }