コード例 #1
0
ファイル: kmeans.cpp プロジェクト: JorgePoblete/k-means
void Kmeans::validate_centroids(const MatrixXdRowMajor& data_points)
{
    points_per_centroid = VectorXd::Zero(centroids.rows());
    for (int i=0; i<data_points.rows(); i++)
        points_per_centroid(membership(i))++;
    for (int i=0; i<centroids.rows(); i++)
    {
        if (points_per_centroid(i) == 0)
        {
            int biggest_centroid = find_biggest_centroid();
            int farthest_point = find_farthest_point(data_points,biggest_centroid);
            membership(farthest_point) = i;
            points_per_centroid(biggest_centroid)--;
            points_per_centroid(i)++;
        }
    }
}
コード例 #2
0
 void Function::load(const std::string& formula,
         const Engine* engine) {
     unload();
     this->_formula = formula;
     this->_engine = engine;
     this->_root.reset(parse(formula));
     membership(0.0); //make sure function evaluates without throwing exception.
 }
コード例 #3
0
ファイル: kmeans.cpp プロジェクト: JorgePoblete/k-means
void Kmeans::e_step(const MatrixXdRowMajor& data_points)
{
    //#pragma omp parallel for schedule(static)
    for (int i=0; i<data_points.rows(); i++)
    {
        membership(i) = find_nearest_centroid(data_points.row(i));
    }
}
コード例 #4
0
 void LinguisticTerm::samples(std::vector<flScalar>& x, std::vector<flScalar>& y,
         int samples, int out_of_range) const {
     flScalar step_size = (maximum() - minimum()) / samples;
     flScalar step = minimum() - (out_of_range * step_size);
     for (int i = 0 - out_of_range; i < samples + out_of_range + 1; ++i, step
             += step_size) {
         x.push_back(step);
         y.push_back(membership(step));
     }
 }
コード例 #5
0
ファイル: kmeans.cpp プロジェクト: JorgePoblete/k-means
void Kmeans::m_step(const MatrixXdRowMajor& data_points)
{
    validate_centroids(data_points);
    MatrixXdRowMajor tmp = MatrixXdRowMajor::Zero(centroids.rows(), centroids.cols());
    for (int i=0; i<data_points.rows(); i++)
        tmp.row(membership(i)) += data_points.row(i);
    for (int i=0; i<centroids.rows(); i++)
    {
        tmp.row(i) /= points_per_centroid(i);
        double tmp_error = euclid_distance(centroids.row(i), tmp.row(i));
        if (tmp_error != 0.0)
            changed_centroids++;
        if (tmp_error > error)
            error = tmp_error;
        centroids.row(i) = tmp.row(i);
    }
}
コード例 #6
0
ファイル: Cluster.cpp プロジェクト: Goon83/scidb
boost::shared_ptr<const InstanceLiveness> Cluster::getInstanceLiveness()
{
   boost::shared_ptr<const InstanceLiveness> liveness(NetworkManager::getInstance()->getInstanceLiveness());
   if (liveness) {
      return liveness;
   }
   boost::shared_ptr<const InstanceMembership> membership(getInstanceMembership());
   boost::shared_ptr<InstanceLiveness> newLiveness(new InstanceLiveness(membership->getViewId(), 0));
   for (std::set<InstanceID>::const_iterator i = membership->getInstances().begin();
        i != membership->getInstances().end(); ++i) {
      InstanceID instanceId(*i);
      InstanceLiveness::InstancePtr entry(new InstanceLivenessEntry(instanceId, 0, false));
      newLiveness->insert(entry);
   }
   liveness = newLiveness;
   assert(liveness->getNumLive() > 0);
   return liveness;
}
コード例 #7
0
ファイル: pvalue.cpp プロジェクト: antonislambrou/GACP
double pvalue(int **hypotheses, int l, int eq, float gamma)
{
	double p=0;
	double others=0;
	double own=0;
	double *strangeness;
	int ctr = 0;
	int rules2;
	double Q[classes][::rules]; //global rules
	
	strangeness = (double *) malloc(sizeof(double) * (trainx+1));
	
	//find a quality value for each rule
	for (int cl = 1;cl<=classes;cl++)
	{	
		for (int r=0;r<hypotheses[cl-1][0];r++)
		{	
			double TP = 0;
			double FP = 0;
			
			for(int x=0;x<=trainx;x++)
			{
				double min = 1000;
				for(int a=0;a<attributes;a++)
				{
					short fuzzyGene[fuzzysets];
					for(int g=0;g<fuzzysets;g++)
					{
						fuzzyGene[g] = hypotheses[cl-1][fuzzysets*a+(r*((attributes) *fuzzysets))+g+1];
					}
					
					double ms = membership(a,train_set[x][a],fuzzyGene);
					if(min < ms)
						min = ms;
				}
				if(hypotheses[cl-1][attributes] == train_set[x][attributes])
				{
					TP += min;
				}
				else
				{
					FP += min;
				}	
			}
			
			Q[cl-1][r] = TP / (TP+FP);		
		}		
	}
		
	
	for(int x=0;x<=trainx ;x++)
	{
		others = 0;
		double mem[] = {0,0};
		double max = -1;
		double min = 1000;
		for(int cl=1;cl<=classes;cl++)
		{
			rules2 = hypotheses[cl-1][0];
			max = -1;
			for(int r=0;r<rules2;r++)
			{
				min = 1000;
				for(int a=0;a<attributes;a++)
				{
					short fuzzyGene[fuzzysets];

					for(int g=0;g<fuzzysets;g++)
					{
						fuzzyGene[g] = hypotheses[cl-1][fuzzysets*a+(r*((attributes) *fuzzysets))+g+1];
					}
					double ms = membership(a,train_set[x][a],fuzzyGene);
					if(ms <  min)
						min = ms;
				}

				min *= Q[cl-1][r];
				if(min > max)
					max = min;
			}

			mem[cl-1] = max;
		}
		
		
		//find sum of others 
    	for(int j=1;j<=classes;j++)
        	if (j != (int) train_set[x][attributes] ){
        	    others = others + mem[j-1];
        	    //printf("%.4f ",mem[j-1]);
        	}
		//printf("others: %.2f\n",others);
    	own = mem[(int)train_set[x][attributes]-1];
    	//printf("own: %d %.2f\n",(int)train_set[x][attributes],own);
    	if(eq == 13)
    		strangeness[x] = (double) others / ((double)own+(double)gamma);
    	else if(eq == 14)
    		strangeness[x] = others - own*gamma;
    	//else
    	//	strangeness[x] = 1 - own;

	}

	int another_ctr = 0;
	for(int i=0;i<=trainx;i++){
    		another_ctr++;
    		if(strangeness[i] >= strangeness[trainx]){
    		    ctr = ctr+1;
    		}
    }
	p = (double) ctr / (double) (another_ctr);

	return p;
}
コード例 #8
0
ファイル: algos.hpp プロジェクト: cjain7/mxx
std::vector<size_t> bucketing(std::vector<T>& elements, Func key_func, size_t num_buckets) {

    // number of elements per bucket
    std::vector<size_t> send_counts(num_buckets, 0);

    // if no elements, return 0 count for each bucket
    if (elements.size() == 0)
        return send_counts;

    // for each element, track which bucket it belongs into
    std::vector<long> membership(elements.size());
    for (size_t i = 0; i < elements.size(); ++i)
    {
        membership[i] = key_func(elements[i]);
        ++(send_counts[membership[i]]);
    }
    // at this point, have target assignment for each data element, and also count for each process bucket.

    // compute the offsets within the buffer
    std::vector<size_t> offset = send_counts;
    excl_prefix_sum(offset.begin(), offset.end());
    std::vector<size_t> maxes = offset;

    for (size_t i = 0; i < num_buckets; ++i) {
        maxes[i] += send_counts[i];
    }


    //== swap elements around.
    T val;
    size_t tar_pos, start_pos;

    long target;

    // while loop will stop under 2 conditions:
    //      1. returned to starting position (looped), or
    //      2, tar_pos is the current pos.
    // either way, we need a new starting point.  instead of searching through buffer O(N), search
    // for incomplete buckets via offset O(p).

    for (size_t i = 0; i < num_buckets;) {
        // determine the starting position.
        if (offset[i] == maxes[i]) {
            ++i;  // skip all completed buckets
            continue;  // have the loop check value.
        }
        // get the start pos.
        start_pos = offset[i];

        // set up the variable with the current entry.
        target = membership[start_pos];
        if (target > -1) {
            val = ::std::move(elements[start_pos]);  // value to move
            membership[start_pos] = -2;                // special value to indicate where we started from.

            while (target > -1) {  // if -1 or -2, then either visited or beginning of chain.
                tar_pos = offset[target]++;  // compute new position.  earlier offset values for the same pid are should have final values already.
                target = membership[tar_pos];

                // save the info at tar_pos;
                ::std::swap(val, elements[tar_pos]);  // put what's in src into buffer at tar_pos, and save what's at buffer[tar_pos]
                membership[tar_pos] = -1;               // mark as visited.

            }  // else already visited, so done.
        }
    }

    return send_counts;
}
コード例 #9
0
void 
vtree_user::match_list( pcl::PointCloud<PointT>::Ptr & point_cloud_in,
						std::vector<std::pair<float,std::string> > & match_names,
						std::vector<std::pair<float,std::string> > & cluster_match_names,
						int num_match )
{
	// Extract keypoint in the pointcloud
	ROS_INFO("Extracting keypoints and computing features! We have a cloud with %d points", static_cast<int>(point_cloud_in->size()) );
	
	pcl::PointCloud<PointT>::Ptr keypoint_cloud ( new pcl::PointCloud<PointT> );
	pcl::PointCloud<FeatureType>::Ptr feature_cloud( new pcl::PointCloud<FeatureType> );
	compute_features( point_cloud_in, keypoint_cloud, feature_cloud );
	
	int num_feat = feature_cloud->size();
	ROS_INFO("Done. %d features found", num_feat);
	
	if( num_feat == 0)
	{
		ROS_INFO("The feature cloud is empty");
		return;
	}
	
	// Rectify the historgram values to ensure they are in [0,100] and create a document
	vt::Document full_doc;		
	for( pcl::PointCloud<FeatureType>::iterator iter = feature_cloud->begin();
	iter != feature_cloud->end(); ++iter)
	{
		rectify_histogram( *iter );
		full_doc.push_back(tree.quantize( FeatureHist( iter->histogram ) ));	
	}
	
	
	// Cluster the keypoints in 2D
	ANNpointArray ann_points;
	ann_points = annAllocPts(num_feat, 3);
	std::vector<KeypointExt*> extended_keypoints;
		
	for( int i=0; i < num_feat; ++i )
	{
		if (enable_clustering)
	  	{
	  		ann_points[i][0] = keypoint_cloud->points[i].x;
	  		ann_points[i][1] = keypoint_cloud->points[i].y;
	  		ann_points[i][2] = keypoint_cloud->points[i].z;
	  	}
		extended_keypoints.push_back( new KeypointExt( feature_cloud->at(i), full_doc[i] ) );
	}
	
	
	int cluster_count = 0;
	std::vector<int> cluster_sizes;
	if (enable_clustering)
	{
		std::vector<int> membership(num_feat);
		cluster_count = pcd_utils::cluster_points( ann_points, num_feat, membership,
												  radius_adaptation_r_max, radius_adaptation_r_min, 
												  radius_adaptation_A, radius_adaptation_K );
																		
		cluster_sizes.resize(cluster_count, 0);
		//cluster_sizes.assign(cluster_count, 0);
		for (int i = 0; i < num_feat; ++i)
		{
			extended_keypoints[i]->cluster = membership[i];
			++cluster_sizes[membership[i]];
		}
		delete[] ann_points;
	}
	if(DEBUG)
		ROS_INFO_STREAM("Clusters found = " << cluster_count);
	//*******************************************************************

	// Obtain the matches from the database	
	vt::Matches matches;
	db->find(full_doc, num_match, matches);	// std::string documents_map[matches[i].id]->name; float matches[i].score, 
	
	match_names.clear();
	for ( vt::Matches::iterator it = matches.begin(); it != matches.end(); ++it)
		match_names.push_back( std::make_pair( it->score, documents_map[it->id]->name ));
		
			
	if (enable_clustering)
	{
		// store in matches_map
		std::map<uint32_t, float> matches_map;
		for ( vt::Matches::iterator it = matches.begin(); it != matches.end(); ++it)
		{
			matches_map[it->id] = it->score;
		}
	
		// Calculates and accumulates scores for each cluster
		for (int c = 0; c < cluster_count; ++c) 
		{
			vt::Document cluster_doc;
			vt::Matches cluster_matches;
		
			for (int i = 0; i < num_feat; ++i)
				if ( extended_keypoints[i]->cluster == static_cast<unsigned int>(c) )
					cluster_doc.push_back(full_doc[i]);
		
			if (cluster_doc.size() < static_cast<unsigned int>(min_cluster_size))
				continue;
		
			db->find(cluster_doc, num_match, cluster_matches);
		
			if(DEBUG)
				ROS_INFO_STREAM("Cluster " << c <<  "(size = " << cluster_doc.size() << "):");

		
			//update_matches_map(cluster_matches, cluster_doc.size());
			for ( vt::Matches::iterator it = cluster_matches.begin(); it != cluster_matches.end(); ++it)
			{
				matches_map[it->id] = it->score;
			}		
		}
		
		// Get the updated match names
		cluster_match_names.clear();
		for( std::map<uint32_t, float>::iterator iter = matches_map.begin();
		 	 iter != matches_map.end(); ++iter )
		{
			cluster_match_names.push_back( std::make_pair( iter->second, documents_map[iter->first]->name) );
		}
		
		// sort
		std::sort( cluster_match_names.begin(), cluster_match_names.end() );
	}
}
コード例 #10
0
ファイル: Clustering.cpp プロジェクト: wangbiaouestc/clpeak
util::Clustering::ClusterResult util::Clustering::_kmeans(unsigned k) {
	if (k == 0) {
		return ClusterResult();
	}
	
	const double threshold = 0.001;
	const unsigned maxIterations = 5000;
	srand((int)(*m_data.begin()));
	
	double delta = 0.0;
	unsigned iteration = 0;
	
	typedef std::vector< short > Membership;
	Membership membership(m_data.size(), -1);
	
	typedef std::vector< float > Distances;
	Distances distances(k, 0.0);
	
	ClusterVector clusters;
	clusters.resize(k);
	for (unsigned i = 0; i < k; i++) {
		clusters[i].position = m_data[rand() % m_data.size()];
	}
	
	// clustering main loop
	do {
		delta = 0.0;
		for (unsigned i = 0; i < k; i++) {
			clusters[i].data.clear();
		}
		
		for (unsigned i = 0; i < m_data.size(); i++) {
			float value = m_data[i];
			
			// compute which cluster lies at minimum distance to data point
			float dist = 0.0, minDist = 1e20;
			unsigned minIdx = 0;
			for (unsigned j = 0; j < k; j++) {
				if ((dist = _distance(value, clusters[j].position)) < minDist) {
					minDist = dist;
					minIdx = j;
				};
			}
			clusters[minIdx].data.push_back(value);
			
			// check if membership changed
			if (membership[i] != minIdx) {
				delta += 1.0;
				membership[i] = minIdx;
			}
		}
		
		for (unsigned i = 0; i < k; i++) {
			clusters[i].updatePosition();
		}
		
		delta /= m_data.size();
		iteration++;
	} while (delta > threshold && iteration < maxIterations);
	
	// compute score for the clustering
	for (unsigned i = 0; i < k; i++) {
		clusters[i].updateScore();
	}
	
	ClusterResult result;
	for (auto it = clusters.begin(); it != clusters.end(); ++it) {
		result.add(*it);
	}
	return result;
}
コード例 #11
0
bool MembershipBuilder::readFile() {
	string data;
	ifstream datafile;
	datafile.open(fileName, ifstream::in);

	if(!datafile)
	{
		cout << fileName
			 << " opened failed!!"
			 << endl;

		exit(-1);
	}

	getline(datafile, data, '\0'); // read the whole text file as string

	string delimiter = "\n";
	char delim = ',';
	size_t pos = 0;
	size_t pos2 = 0;
	string line;

	string id, rank;
	double fee;
	vector<string> linesplit;

	while ((pos = data.find(delimiter)) != string::npos) // split the data string by \n delimiter
	{
		line = data.substr(0, pos);
		split(line, delim, linesplit);
		data.erase(0, pos + delimiter.length());
	}
	int n = 0;
	for(int i = 0; i < linesplit.size(); i++)
	{
		vector<string> accessRight;

		if(n == 4)
		{
			n = 0;
		}
	   switch(n)
	   {
		   case 0: id = linesplit[i]; break;
		   case 1: rank = linesplit[i]; break;
		   case 2: fee = atof(linesplit[i].c_str()); break;
		   case 3:
			   while ((pos = linesplit[i].find(delimiter)) != string::npos) // split the data string by \n delimiter
			   	{
			   		line = linesplit[i].substr(0, pos2);
			   		split(line, ':', accessRight);
			   		linesplit[i].erase(0, pos2 + delimiter.length());
			   	}

			   	   break;
	   }
	   if(n == 3)
	   {
		   Membership membership(id, rank, fee, accessRight);
		   vectorOfMembership.push_back(membership);

	   }
	   n++;
	}
	datafile.close();
	return true;
}
コード例 #12
0
ファイル: kmeans.cpp プロジェクト: JorgePoblete/k-means
void Kmeans::mic_e_step(const MatrixXdRowMajor& data_points)
{
    int k = centroids.rows();
    int d = data_points.cols();

    double *host_centroids = centroids.data();
    double *host_membership = membership.data();
	int data_points_count = mic_stream_data_points_count;
    int start = 0;
    int size = mic_data_points_count * data_points.cols();
	double *mic_process_signal_a;
    double *mic_process_signal_b;
    double *mic_transfer_out_signal_a;
    double *mic_transfer_out_signal_b;

    #pragma omp parallel sections
    {
        //mic section
        #pragma omp section
        {
            //send the centroids to each mic
            for (int i=0; i<mic_number_devices; i++)
            {
                #pragma offload_transfer target(mic:i) in(host_centroids[0:k*d]: REUSE into(mic_centroids[0:k*d]))
            }

            //main process loop
            for (int n=0; n<mic_number_streamings; n++)
            {
                if (n % 2 == 0)
                {
                    //process a chunk of data_points
                    for (int i=0; i<mic_number_devices; i++)
                    {
                        #pragma offload target(mic:i) signal(mic_process_signal_a)\
                        in(mic_data_points:length(0) REUSE)\
                        in(mic_centroids:length(0) REUSE)\
                        in(mic_stream_membership_a:length(0) REUSE)
                        {
                            #pragma omp parallel for schedule(static)
                            for (int j=0; j<data_points_count; j++)
                	        {
                	            mic_stream_membership_a[j] = mic_find_nearest_centroid(j * d + mic_stream_a,mic_centroids, k, d);
                	        }
                        }
                    }
                    //asynchroniusly transfer the results from each mic to the host
                    for (int i=0; i<mic_number_devices; i++)
                    {
                        #pragma offload_transfer target(mic:i) wait(mic_process_signal_a) signal(mic_transfer_out_signal_a)\
                        out(mic_stream_membership_a:length(data_points_count) REUSE into(host_membership[0:data_points_count]))
                        host_membership += data_points_count;
                    }
                } else
                {

                }
            }
        }
        //host section
        #pragma omp section
        {
            #pragma omp parallel for schedule(static)
            for (int i=host_initial_data_point; i<data_points.rows(); i++)
            {
                membership(i) = find_nearest_centroid(data_points.row(i));
            }
        }
    }
}