Esempio n. 1
0
void FrequentPatternGroup::frequentPatternGroup(std::vector<AdjacenceListsGRAPH> & queryGraphVector, std::vector<std::vector<int>> & similiarQueryGroups)
{

	std::map<string, vector<int>> TLSInvertedIndex;
	std::vector<Transaction> transactions;

	for (std::vector<AdjacenceListsGRAPH>::iterator queryIterator = queryGraphVector.begin(); queryIterator != queryGraphVector.end(); queryIterator++) {
		// each query is treated as a transaction
		transactions.push_back(std::vector<string>());
		Transaction & newTransaction = transactions[transactions.size() - 1];

		for (std::map<TLSequence, std::vector<std::vector<int>>>::iterator tlsSeqIterator = queryIterator->getTLSequenceMap()->begin(); tlsSeqIterator != queryIterator->getTLSequenceMap()->end(); tlsSeqIterator++) {
			string tlsQuanti = std::to_string(tlsSeqIterator->first.start);
			tlsQuanti += "_";
			tlsQuanti += std::to_string(tlsSeqIterator->first.pivot);
			tlsQuanti += "_";
			tlsQuanti += std::to_string(tlsSeqIterator->first.end);
			tlsQuanti += "_";
			for (int i = 0; i < tlsSeqIterator->second.size(); i++) {
				string tlsIndexKey = tlsQuanti + std::to_string(i);
				std::map<string, vector<int>>::iterator invertexIndexIter = TLSInvertedIndex.find(tlsIndexKey);
				if (invertexIndexIter == TLSInvertedIndex.end()) {
					vector<int> tlsIndexValue;
					tlsIndexValue.push_back(queryIterator->graphId);
					TLSInvertedIndex.insert(std::pair<string, vector<int>>(tlsIndexKey, tlsIndexValue));
				}
				else {
					invertexIndexIter->second.push_back(queryIterator->graphId);
				}

				// we use a format i.e. "start_pivot_end_1" to distinuish each tls, thus we can can consider the number of the same tls of each query graph
				newTransaction.push_back(tlsIndexKey);
			}
		}
	}

	const unsigned minimum_support_treshold = 2;

	const FPTree fptree{ transactions, minimum_support_treshold };

	std::set<Pattern> patterns = fptree_growth(fptree);


	cout << "Frequent TLS Size: " << patterns.size() << endl;
	for (std::set<Pattern>::iterator patternIterator = patterns.begin(); patternIterator != patterns.end(); patternIterator++) {
		cout << patternIterator->second << " ";
		for (std::set<Item>::iterator itemIterator = patternIterator->first.begin(); itemIterator != patternIterator->first.end(); itemIterator++) {
			cout<<(*itemIterator) << " ";
		}
		cout << endl;
	}
}
Esempio n. 2
0
std::set<Pattern> fptree_growth(const FPTree& fptree) {
    if ( fptree.empty() ) { return std::set<Pattern>{}; }
    
    if ( contains_single_path( fptree ) ) {
        // generate all possible combinations of the items in the tree
        
        std::set<Pattern> single_path_patterns;
        
        // for each node in the tree
        assert( fptree.root->children.size() == 1 );
        std::shared_ptr<FPNode> curr_fpnode = fptree.root->children.front();
        while ( curr_fpnode ) {
            const Item& curr_fpnode_item = curr_fpnode->item;
            const unsigned curr_fpnode_frequency = curr_fpnode->frequency;
            
            // add a pattern formed only by the item of the current node
            Pattern new_pattern = { { curr_fpnode_item }, curr_fpnode_frequency };
            single_path_patterns.insert( new_pattern );
            
            // create a new pattern by adding the item of the current node to each pattern generated until now
            for ( const Pattern& pattern : single_path_patterns ) {
                Pattern new_pattern{ pattern };
                new_pattern.first.insert( curr_fpnode_item );
                assert( curr_fpnode_frequency <= pattern.second );
                new_pattern.second = curr_fpnode_frequency;
                
                single_path_patterns.insert( new_pattern );
            }

            // advance to the next node until the end of the tree
            assert( curr_fpnode->children.size() <= 1 );
            if ( curr_fpnode->children.size() == 1 ) { curr_fpnode = curr_fpnode->children.front(); }
            else { curr_fpnode = nullptr; }
        }
        
        return single_path_patterns;
    }
    else {
        // generate conditional fptrees for each different item in the fptree, then join the results

        std::set<Pattern> multi_path_patterns;
        
        // for each item in the fptree
        for ( const auto& pair : fptree.header_table ) {
            const Item& curr_item = pair.first;
            
            // build the conditional fptree relative to the current item
            
            // start by generating the conditional pattern base
            std::vector<TransformedPrefixPath> conditional_pattern_base;
            
            // for each path in the header_table (relative to the current item)
            std::shared_ptr<FPNode> path_starting_fpnode = pair.second;
            while ( path_starting_fpnode ) {
                // construct the transformed prefix path
                
                // each item in th transformed prefix path has the same frequency (the frequency of path_starting_fpnode)
                const unsigned path_starting_fpnode_frequency = path_starting_fpnode->frequency;

                std::shared_ptr<FPNode> curr_path_fpnode = path_starting_fpnode->parent;
                // check if curr_path_fpnode is already the root of the fptree
                if ( curr_path_fpnode->parent ) {
                    // the path has at least one node (excluding the starting node and the root)
                    TransformedPrefixPath transformed_prefix_path{ {}, path_starting_fpnode_frequency };
                    
                    while ( curr_path_fpnode->parent ) {
                        assert( curr_path_fpnode->frequency >= path_starting_fpnode_frequency );
                        transformed_prefix_path.first.push_back( curr_path_fpnode->item );
                        
                        // advance to the next node in the path
                        curr_path_fpnode = curr_path_fpnode->parent;
                    }
                    
                    conditional_pattern_base.push_back( transformed_prefix_path );
                }
                
                // advance to the next path
                path_starting_fpnode = path_starting_fpnode->node_link;
            }
        
            // generate the transactions that represent the conditional pattern base
            std::vector<Transaction> conditional_fptree_transactions;
            for ( const TransformedPrefixPath& transformed_prefix_path : conditional_pattern_base ) {
                const std::vector<Item>& transformed_prefix_path_items = transformed_prefix_path.first;
                const unsigned transformed_prefix_path_items_frequency = transformed_prefix_path.second;
                
                Transaction transaction;
                for ( const Item& item : transformed_prefix_path_items ) { transaction.push_back( item ); }
                
                // add the same transaction transformed_prefix_path_items_frequency times
                for ( int i = 0; i < transformed_prefix_path_items_frequency; ++i ) { conditional_fptree_transactions.push_back( transaction ); }
            }
            
            // build the conditional fptree relative to the current item with the transactions just generated
            const FPTree conditional_fptree( conditional_fptree_transactions, fptree.minimum_support_treshold );
            // call recursively fptree_growth on the conditional fptree (empty fptree: no patterns)
            std::set<Pattern> conditional_patterns = fptree_growth( conditional_fptree );
            
            // construct patterns relative to the current item using both the current item and the conditional patterns
            std::set<Pattern> curr_item_patterns;
            
            // the first pattern is made only by the current item
            // compute the frequency of this pattern by summing the frequency of the nodes which have the same item (follow the node links)
            unsigned curr_item_frequency = 0;
            std::shared_ptr<FPNode> fpnode = pair.second;
            while ( fpnode ) {
                curr_item_frequency += fpnode->frequency;
                fpnode = fpnode->node_link;
            }
            // add the pattern as a result
            Pattern pattern{ { curr_item }, curr_item_frequency };
            curr_item_patterns.insert( pattern );
            
            // the next patterns are generated by adding the current item to each conditional pattern
            for ( const Pattern& pattern : conditional_patterns ) {
                Pattern new_pattern{ pattern };
                new_pattern.first.insert( curr_item );
                assert( curr_item_frequency >= pattern.second );
                new_pattern.second = pattern.second;

                curr_item_patterns.insert( { new_pattern } );
            }
            
            // join the patterns generated by the current item with all the other items of the fptree
            multi_path_patterns.insert( curr_item_patterns.cbegin(), curr_item_patterns.cend() );
        }
        
        return multi_path_patterns;
    }
}