Beispiel #1
0
plJointDistribution createClusteringJointDist( const Variables& variables, const Clustering& clustering) {
  plComputableObjectList cndProbTabs;
  for ( Clustering::const_iterator it = clustering.begin(); it != clustering.end(); ++it ) {
    cndProbTabs *= createClusterJointDist( variables, *it);
  }
  //std::cout << cndProbTabs << std::endl;
  plJointDistribution jointDist( variables, cndProbTabs );
  return jointDist;
}
bool MatchingDynamicClusterer::bootstrap( Clustering &step_clustering )
{
	m_dynamic.clear();
	Clustering::iterator cit;
	Clustering::iterator cend = step_clustering.end();
	int step_cluster_index = 0;
	for( cit = step_clustering.begin() ; cit != cend; cit++, step_cluster_index++ )
	{
		// if( (*cit).size() < MIN_CLUSTER_SIZE || (*cit).size() > MAX_CLUSTER_SIZE )
		if( (*cit).size() < MIN_CLUSTER_SIZE )
		{
			continue;
		}
		DynamicCluster dc;
		dc.update( m_step, step_cluster_index, *cit );
		m_dynamic.push_back(dc);
#ifdef DEBUG_MATCHING
		cout << "T" << m_step << ": Birth: Community M" << m_dynamic.size() << endl;
#endif			
	}
	return true;
}
bool MapMatchingDynamicClusterer::add_clustering( Clustering &step_clustering )
{
	m_step += 1;
	/// First?
	if( m_step == 1 )
	{
		return bootstrap(step_clustering);
	}
	
	int step_cluster_index = 0;

	/// Build a map of Nodes -> Dynamic Communities containing those nodes
	map<NODE,set<int> > fastmap;
	DynamicClustering::iterator dit;
	DynamicClustering::iterator dend = m_dynamic.end();
	int dyn_count = (int)m_dynamic.size();
	int dyn_index = 0;
	long* dyn_sizes = new long[dyn_count+1];
	for( dit = m_dynamic.begin() ; dit != dend; dit++, dyn_index++ )
	{
		// Dead?
		if( m_death_age > 0 && m_dynamic[dyn_index].is_dead( m_step, m_death_age ) )
		{
			dyn_sizes[dyn_index] = 0;
			continue;
		}
		Cluster& front = (*dit).front();
		dyn_sizes[dyn_index] = (long)front.size();
		Cluster::const_iterator fit;
		Cluster::const_iterator	fend = front.end();
		for( fit = front.begin() ; fit != fend; fit++ )
		{
			NODE node_index = *fit;
			if( !fastmap.count( node_index ) )
			{
				set<int> first;
				first.insert(dyn_index);
				fastmap.insert( make_pair(node_index,first) );
			}
			else
			{
				fastmap[node_index].insert(dyn_index);
			}
		}
	}	

	/// Now try to match all
	int* all_intersection = new int[dyn_count+1];
	vector<DynamicCluster> fresh;
	PairVector matched_pairs;
	map<NODE,set<int> >::const_iterator mend = fastmap.end();
	Clustering::iterator cit;
	Clustering::iterator cend = step_clustering.end();
	for( cit = step_clustering.begin() ; cit != cend; cit++, step_cluster_index++ )
	{
		long size_step = (long)(*cit).size();
		if( size_step < MIN_CLUSTER_SIZE )
		{
			continue;
		}
		// Compute all intersections
		for( dyn_index = 0; dyn_index < dyn_count; dyn_index++)
		{
			all_intersection[dyn_index] = 0;
		}
		Cluster::const_iterator xit;
		Cluster::const_iterator	xend = (*cit).end();
		for( xit = (*cit).begin() ; xit != xend; xit++ )
		{
			NODE node_index = *xit;
			map<NODE,set<int> >::const_iterator mit = fastmap.find(node_index);
			if( mit != mend )
			{
				set<int>::const_iterator sit;
				for ( sit = fastmap[node_index].begin(); sit != fastmap[node_index].end(); sit++ )
				{
					all_intersection[(*sit)]++;
				}
			}
		}
		// Find matches
		vector<int> matches;
		for( dyn_index = 0; dyn_index < dyn_count; dyn_index++)
		{
			if( dyn_sizes[dyn_index] == 0 || all_intersection[dyn_index] == 0 )
			{
				continue;
			}
#ifdef SIM_OVERLAP
			double sim = ((double)(all_intersection[dyn_index]))/min(size_step,dyn_sizes[dyn_index]);
#else
			double sim = ((double)(all_intersection[dyn_index]))/(size_step+dyn_sizes[dyn_index]-all_intersection[dyn_index]);
#endif
			if( sim > m_threshold )
			{
				matches.push_back( dyn_index );
			}
		}

		// new community?
		if( matches.empty() )
		{
			DynamicCluster dc;
			dc.update( m_step, step_cluster_index, *cit );
			fresh.push_back(dc);
#ifdef DEBUG_MATCHING
			cout << "T" << m_step << ": Birth: Community M" << (m_dynamic.size()+fresh.size()) << " from C" << step_cluster_index+1 << endl;
#endif			
		}
		else
		{
			vector<int>::const_iterator iit;
			for( iit = matches.begin() ; iit != matches.end(); iit++ )
			{
				pair<int,int> p(step_cluster_index,(*iit));
				matched_pairs.push_back(p);
			}
		}
	}

	// Actually update existing dynamic communities now
	set<int> matched_dynamic;
	PairVector::const_iterator pit;
	for( pit = matched_pairs.begin(); pit != matched_pairs.end(); pit++ )
	{
		int step_cluster_index = (*pit).first;
		int dyn_cluster_index = (*pit).second;
		// already processed this dynamic cluster?
		if( matched_dynamic.count( dyn_cluster_index ) ) 
		{
			DynamicCluster dc( m_dynamic[dyn_cluster_index], m_step, step_cluster_index, step_clustering[step_cluster_index] );
			fresh.push_back(dc);
#ifdef DEBUG_MATCHING
			cout << "T" << m_step << ": Split: Matched C" << (step_cluster_index+1) << " to M" << (dyn_cluster_index+1) << ". Splitting to M" << (m_dynamic.size()+fresh.size()) <<  endl;
#endif
		}
		else
		{
#ifdef DEBUG_MATCHING
			cout << "T" << m_step << ": Continuation: Matched C" << (step_cluster_index+1) << " to M" << (dyn_cluster_index+1) << endl;
#endif
			m_dynamic[dyn_cluster_index].update( m_step, step_cluster_index, step_clustering[step_cluster_index] );
			matched_dynamic.insert(dyn_cluster_index);
		}
	}
	// And finally add any new dynamic communities
	for( dit = fresh.begin() ; dit != fresh.end(); dit++ )
	{
		m_dynamic.push_back(*dit);
	}

	delete[] dyn_sizes;
	delete[] all_intersection;
	return true;
}
bool MatchingDynamicClusterer::add_clustering( Clustering &step_clustering )
{
	m_step += 1;
	/// First?
	if( m_step == 1 )
	{
		return bootstrap(step_clustering);
	}
	
	/// Otherwise, try to match all
	Clustering::iterator cit;
	Clustering::iterator cend = step_clustering.end();
	int step_cluster_index = 0;
	vector<DynamicCluster> fresh;
	PairVector matched_pairs;
	for( cit = step_clustering.begin() ; cit != cend; cit++, step_cluster_index++ )
	{
		vector<int> matches;
		find_matches( *cit, matches );
		// new community?
		if( matches.empty() )
		{
			DynamicCluster dc;
			dc.update( m_step, step_cluster_index, *cit );
			fresh.push_back(dc);
#ifdef DEBUG_MATCHING
			cout << "T" << m_step << ": Birth: Community M" << (m_dynamic.size()+fresh.size()) << " from C" << step_cluster_index+1 << endl;
#endif			
		}
		else
		{
			vector<int>::const_iterator iit;
			for( iit = matches.begin() ; iit != matches.end(); iit++ )
			{
				pair<int,int> p(step_cluster_index,(*iit));
				matched_pairs.push_back(p);
			}
		}
	}
	
	// Actually update existing dynamic communities now
	set<int> matched_dynamic;
	PairVector::const_iterator pit;
	for( pit = matched_pairs.begin(); pit != matched_pairs.end(); pit++ )
	{
		int step_cluster_index = (*pit).first;
		int dyn_cluster_index = (*pit).second;
		// already processed this dynamic cluster?
		if( matched_dynamic.count( dyn_cluster_index ) ) 
		{
			DynamicCluster dc( m_dynamic[dyn_cluster_index], m_step, step_cluster_index, step_clustering[step_cluster_index] );
			fresh.push_back(dc);
#ifdef DEBUG_MATCHING
			cout << "T" << m_step << ": Split: Matched C" << (step_cluster_index+1) << " to M" << (dyn_cluster_index+1) << ". Splitting to M" << (m_dynamic.size()+fresh.size()) <<  endl;
#endif
		}
		else
		{
#ifdef DEBUG_MATCHING
			cout << "T" << m_step << ": Continuation: Matched C" << (step_cluster_index+1) << " to M" << (dyn_cluster_index+1) << endl;
#endif
			m_dynamic[dyn_cluster_index].update( m_step, step_cluster_index, step_clustering[step_cluster_index] );
			matched_dynamic.insert(dyn_cluster_index);
		}
	}
	// And finally add any new dynamic communities
	DynamicClustering::const_iterator dit;
	for( dit = fresh.begin() ; dit != fresh.end(); dit++ )
	{
		m_dynamic.push_back(*dit);
	}
	
	return true;
}