void FrequentPatternGroup::frequentPatternGroup(std::vector<AdjacenceListsGRAPH> & queryGraphVector, std::vector<std::vector<int>> & similiarQueryGroups) { std::map<string, vector<int>> TLSInvertedIndex; std::vector<Transaction> transactions; for (std::vector<AdjacenceListsGRAPH>::iterator queryIterator = queryGraphVector.begin(); queryIterator != queryGraphVector.end(); queryIterator++) { // each query is treated as a transaction transactions.push_back(std::vector<string>()); Transaction & newTransaction = transactions[transactions.size() - 1]; for (std::map<TLSequence, std::vector<std::vector<int>>>::iterator tlsSeqIterator = queryIterator->getTLSequenceMap()->begin(); tlsSeqIterator != queryIterator->getTLSequenceMap()->end(); tlsSeqIterator++) { string tlsQuanti = std::to_string(tlsSeqIterator->first.start); tlsQuanti += "_"; tlsQuanti += std::to_string(tlsSeqIterator->first.pivot); tlsQuanti += "_"; tlsQuanti += std::to_string(tlsSeqIterator->first.end); tlsQuanti += "_"; for (int i = 0; i < tlsSeqIterator->second.size(); i++) { string tlsIndexKey = tlsQuanti + std::to_string(i); std::map<string, vector<int>>::iterator invertexIndexIter = TLSInvertedIndex.find(tlsIndexKey); if (invertexIndexIter == TLSInvertedIndex.end()) { vector<int> tlsIndexValue; tlsIndexValue.push_back(queryIterator->graphId); TLSInvertedIndex.insert(std::pair<string, vector<int>>(tlsIndexKey, tlsIndexValue)); } else { invertexIndexIter->second.push_back(queryIterator->graphId); } // we use a format i.e. "start_pivot_end_1" to distinuish each tls, thus we can can consider the number of the same tls of each query graph newTransaction.push_back(tlsIndexKey); } } } const unsigned minimum_support_treshold = 2; const FPTree fptree{ transactions, minimum_support_treshold }; std::set<Pattern> patterns = fptree_growth(fptree); cout << "Frequent TLS Size: " << patterns.size() << endl; for (std::set<Pattern>::iterator patternIterator = patterns.begin(); patternIterator != patterns.end(); patternIterator++) { cout << patternIterator->second << " "; for (std::set<Item>::iterator itemIterator = patternIterator->first.begin(); itemIterator != patternIterator->first.end(); itemIterator++) { cout<<(*itemIterator) << " "; } cout << endl; } }
std::set<Pattern> fptree_growth(const FPTree& fptree) { if ( fptree.empty() ) { return std::set<Pattern>{}; } if ( contains_single_path( fptree ) ) { // generate all possible combinations of the items in the tree std::set<Pattern> single_path_patterns; // for each node in the tree assert( fptree.root->children.size() == 1 ); std::shared_ptr<FPNode> curr_fpnode = fptree.root->children.front(); while ( curr_fpnode ) { const Item& curr_fpnode_item = curr_fpnode->item; const unsigned curr_fpnode_frequency = curr_fpnode->frequency; // add a pattern formed only by the item of the current node Pattern new_pattern = { { curr_fpnode_item }, curr_fpnode_frequency }; single_path_patterns.insert( new_pattern ); // create a new pattern by adding the item of the current node to each pattern generated until now for ( const Pattern& pattern : single_path_patterns ) { Pattern new_pattern{ pattern }; new_pattern.first.insert( curr_fpnode_item ); assert( curr_fpnode_frequency <= pattern.second ); new_pattern.second = curr_fpnode_frequency; single_path_patterns.insert( new_pattern ); } // advance to the next node until the end of the tree assert( curr_fpnode->children.size() <= 1 ); if ( curr_fpnode->children.size() == 1 ) { curr_fpnode = curr_fpnode->children.front(); } else { curr_fpnode = nullptr; } } return single_path_patterns; } else { // generate conditional fptrees for each different item in the fptree, then join the results std::set<Pattern> multi_path_patterns; // for each item in the fptree for ( const auto& pair : fptree.header_table ) { const Item& curr_item = pair.first; // build the conditional fptree relative to the current item // start by generating the conditional pattern base std::vector<TransformedPrefixPath> conditional_pattern_base; // for each path in the header_table (relative to the current item) std::shared_ptr<FPNode> path_starting_fpnode = pair.second; while ( path_starting_fpnode ) { // construct the transformed prefix path // each item in th transformed prefix path has the same frequency (the frequency of path_starting_fpnode) const unsigned path_starting_fpnode_frequency = path_starting_fpnode->frequency; std::shared_ptr<FPNode> curr_path_fpnode = path_starting_fpnode->parent; // check if curr_path_fpnode is already the root of the fptree if ( curr_path_fpnode->parent ) { // the path has at least one node (excluding the starting node and the root) TransformedPrefixPath transformed_prefix_path{ {}, path_starting_fpnode_frequency }; while ( curr_path_fpnode->parent ) { assert( curr_path_fpnode->frequency >= path_starting_fpnode_frequency ); transformed_prefix_path.first.push_back( curr_path_fpnode->item ); // advance to the next node in the path curr_path_fpnode = curr_path_fpnode->parent; } conditional_pattern_base.push_back( transformed_prefix_path ); } // advance to the next path path_starting_fpnode = path_starting_fpnode->node_link; } // generate the transactions that represent the conditional pattern base std::vector<Transaction> conditional_fptree_transactions; for ( const TransformedPrefixPath& transformed_prefix_path : conditional_pattern_base ) { const std::vector<Item>& transformed_prefix_path_items = transformed_prefix_path.first; const unsigned transformed_prefix_path_items_frequency = transformed_prefix_path.second; Transaction transaction; for ( const Item& item : transformed_prefix_path_items ) { transaction.push_back( item ); } // add the same transaction transformed_prefix_path_items_frequency times for ( int i = 0; i < transformed_prefix_path_items_frequency; ++i ) { conditional_fptree_transactions.push_back( transaction ); } } // build the conditional fptree relative to the current item with the transactions just generated const FPTree conditional_fptree( conditional_fptree_transactions, fptree.minimum_support_treshold ); // call recursively fptree_growth on the conditional fptree (empty fptree: no patterns) std::set<Pattern> conditional_patterns = fptree_growth( conditional_fptree ); // construct patterns relative to the current item using both the current item and the conditional patterns std::set<Pattern> curr_item_patterns; // the first pattern is made only by the current item // compute the frequency of this pattern by summing the frequency of the nodes which have the same item (follow the node links) unsigned curr_item_frequency = 0; std::shared_ptr<FPNode> fpnode = pair.second; while ( fpnode ) { curr_item_frequency += fpnode->frequency; fpnode = fpnode->node_link; } // add the pattern as a result Pattern pattern{ { curr_item }, curr_item_frequency }; curr_item_patterns.insert( pattern ); // the next patterns are generated by adding the current item to each conditional pattern for ( const Pattern& pattern : conditional_patterns ) { Pattern new_pattern{ pattern }; new_pattern.first.insert( curr_item ); assert( curr_item_frequency >= pattern.second ); new_pattern.second = pattern.second; curr_item_patterns.insert( { new_pattern } ); } // join the patterns generated by the current item with all the other items of the fptree multi_path_patterns.insert( curr_item_patterns.cbegin(), curr_item_patterns.cend() ); } return multi_path_patterns; } }