void Kmeans::validate_centroids(const MatrixXdRowMajor& data_points) { points_per_centroid = VectorXd::Zero(centroids.rows()); for (int i=0; i<data_points.rows(); i++) points_per_centroid(membership(i))++; for (int i=0; i<centroids.rows(); i++) { if (points_per_centroid(i) == 0) { int biggest_centroid = find_biggest_centroid(); int farthest_point = find_farthest_point(data_points,biggest_centroid); membership(farthest_point) = i; points_per_centroid(biggest_centroid)--; points_per_centroid(i)++; } } }
void Function::load(const std::string& formula, const Engine* engine) { unload(); this->_formula = formula; this->_engine = engine; this->_root.reset(parse(formula)); membership(0.0); //make sure function evaluates without throwing exception. }
void Kmeans::e_step(const MatrixXdRowMajor& data_points) { //#pragma omp parallel for schedule(static) for (int i=0; i<data_points.rows(); i++) { membership(i) = find_nearest_centroid(data_points.row(i)); } }
void LinguisticTerm::samples(std::vector<flScalar>& x, std::vector<flScalar>& y, int samples, int out_of_range) const { flScalar step_size = (maximum() - minimum()) / samples; flScalar step = minimum() - (out_of_range * step_size); for (int i = 0 - out_of_range; i < samples + out_of_range + 1; ++i, step += step_size) { x.push_back(step); y.push_back(membership(step)); } }
void Kmeans::m_step(const MatrixXdRowMajor& data_points) { validate_centroids(data_points); MatrixXdRowMajor tmp = MatrixXdRowMajor::Zero(centroids.rows(), centroids.cols()); for (int i=0; i<data_points.rows(); i++) tmp.row(membership(i)) += data_points.row(i); for (int i=0; i<centroids.rows(); i++) { tmp.row(i) /= points_per_centroid(i); double tmp_error = euclid_distance(centroids.row(i), tmp.row(i)); if (tmp_error != 0.0) changed_centroids++; if (tmp_error > error) error = tmp_error; centroids.row(i) = tmp.row(i); } }
boost::shared_ptr<const InstanceLiveness> Cluster::getInstanceLiveness() { boost::shared_ptr<const InstanceLiveness> liveness(NetworkManager::getInstance()->getInstanceLiveness()); if (liveness) { return liveness; } boost::shared_ptr<const InstanceMembership> membership(getInstanceMembership()); boost::shared_ptr<InstanceLiveness> newLiveness(new InstanceLiveness(membership->getViewId(), 0)); for (std::set<InstanceID>::const_iterator i = membership->getInstances().begin(); i != membership->getInstances().end(); ++i) { InstanceID instanceId(*i); InstanceLiveness::InstancePtr entry(new InstanceLivenessEntry(instanceId, 0, false)); newLiveness->insert(entry); } liveness = newLiveness; assert(liveness->getNumLive() > 0); return liveness; }
double pvalue(int **hypotheses, int l, int eq, float gamma) { double p=0; double others=0; double own=0; double *strangeness; int ctr = 0; int rules2; double Q[classes][::rules]; //global rules strangeness = (double *) malloc(sizeof(double) * (trainx+1)); //find a quality value for each rule for (int cl = 1;cl<=classes;cl++) { for (int r=0;r<hypotheses[cl-1][0];r++) { double TP = 0; double FP = 0; for(int x=0;x<=trainx;x++) { double min = 1000; for(int a=0;a<attributes;a++) { short fuzzyGene[fuzzysets]; for(int g=0;g<fuzzysets;g++) { fuzzyGene[g] = hypotheses[cl-1][fuzzysets*a+(r*((attributes) *fuzzysets))+g+1]; } double ms = membership(a,train_set[x][a],fuzzyGene); if(min < ms) min = ms; } if(hypotheses[cl-1][attributes] == train_set[x][attributes]) { TP += min; } else { FP += min; } } Q[cl-1][r] = TP / (TP+FP); } } for(int x=0;x<=trainx ;x++) { others = 0; double mem[] = {0,0}; double max = -1; double min = 1000; for(int cl=1;cl<=classes;cl++) { rules2 = hypotheses[cl-1][0]; max = -1; for(int r=0;r<rules2;r++) { min = 1000; for(int a=0;a<attributes;a++) { short fuzzyGene[fuzzysets]; for(int g=0;g<fuzzysets;g++) { fuzzyGene[g] = hypotheses[cl-1][fuzzysets*a+(r*((attributes) *fuzzysets))+g+1]; } double ms = membership(a,train_set[x][a],fuzzyGene); if(ms < min) min = ms; } min *= Q[cl-1][r]; if(min > max) max = min; } mem[cl-1] = max; } //find sum of others for(int j=1;j<=classes;j++) if (j != (int) train_set[x][attributes] ){ others = others + mem[j-1]; //printf("%.4f ",mem[j-1]); } //printf("others: %.2f\n",others); own = mem[(int)train_set[x][attributes]-1]; //printf("own: %d %.2f\n",(int)train_set[x][attributes],own); if(eq == 13) strangeness[x] = (double) others / ((double)own+(double)gamma); else if(eq == 14) strangeness[x] = others - own*gamma; //else // strangeness[x] = 1 - own; } int another_ctr = 0; for(int i=0;i<=trainx;i++){ another_ctr++; if(strangeness[i] >= strangeness[trainx]){ ctr = ctr+1; } } p = (double) ctr / (double) (another_ctr); return p; }
std::vector<size_t> bucketing(std::vector<T>& elements, Func key_func, size_t num_buckets) { // number of elements per bucket std::vector<size_t> send_counts(num_buckets, 0); // if no elements, return 0 count for each bucket if (elements.size() == 0) return send_counts; // for each element, track which bucket it belongs into std::vector<long> membership(elements.size()); for (size_t i = 0; i < elements.size(); ++i) { membership[i] = key_func(elements[i]); ++(send_counts[membership[i]]); } // at this point, have target assignment for each data element, and also count for each process bucket. // compute the offsets within the buffer std::vector<size_t> offset = send_counts; excl_prefix_sum(offset.begin(), offset.end()); std::vector<size_t> maxes = offset; for (size_t i = 0; i < num_buckets; ++i) { maxes[i] += send_counts[i]; } //== swap elements around. T val; size_t tar_pos, start_pos; long target; // while loop will stop under 2 conditions: // 1. returned to starting position (looped), or // 2, tar_pos is the current pos. // either way, we need a new starting point. instead of searching through buffer O(N), search // for incomplete buckets via offset O(p). for (size_t i = 0; i < num_buckets;) { // determine the starting position. if (offset[i] == maxes[i]) { ++i; // skip all completed buckets continue; // have the loop check value. } // get the start pos. start_pos = offset[i]; // set up the variable with the current entry. target = membership[start_pos]; if (target > -1) { val = ::std::move(elements[start_pos]); // value to move membership[start_pos] = -2; // special value to indicate where we started from. while (target > -1) { // if -1 or -2, then either visited or beginning of chain. tar_pos = offset[target]++; // compute new position. earlier offset values for the same pid are should have final values already. target = membership[tar_pos]; // save the info at tar_pos; ::std::swap(val, elements[tar_pos]); // put what's in src into buffer at tar_pos, and save what's at buffer[tar_pos] membership[tar_pos] = -1; // mark as visited. } // else already visited, so done. } } return send_counts; }
void vtree_user::match_list( pcl::PointCloud<PointT>::Ptr & point_cloud_in, std::vector<std::pair<float,std::string> > & match_names, std::vector<std::pair<float,std::string> > & cluster_match_names, int num_match ) { // Extract keypoint in the pointcloud ROS_INFO("Extracting keypoints and computing features! We have a cloud with %d points", static_cast<int>(point_cloud_in->size()) ); pcl::PointCloud<PointT>::Ptr keypoint_cloud ( new pcl::PointCloud<PointT> ); pcl::PointCloud<FeatureType>::Ptr feature_cloud( new pcl::PointCloud<FeatureType> ); compute_features( point_cloud_in, keypoint_cloud, feature_cloud ); int num_feat = feature_cloud->size(); ROS_INFO("Done. %d features found", num_feat); if( num_feat == 0) { ROS_INFO("The feature cloud is empty"); return; } // Rectify the historgram values to ensure they are in [0,100] and create a document vt::Document full_doc; for( pcl::PointCloud<FeatureType>::iterator iter = feature_cloud->begin(); iter != feature_cloud->end(); ++iter) { rectify_histogram( *iter ); full_doc.push_back(tree.quantize( FeatureHist( iter->histogram ) )); } // Cluster the keypoints in 2D ANNpointArray ann_points; ann_points = annAllocPts(num_feat, 3); std::vector<KeypointExt*> extended_keypoints; for( int i=0; i < num_feat; ++i ) { if (enable_clustering) { ann_points[i][0] = keypoint_cloud->points[i].x; ann_points[i][1] = keypoint_cloud->points[i].y; ann_points[i][2] = keypoint_cloud->points[i].z; } extended_keypoints.push_back( new KeypointExt( feature_cloud->at(i), full_doc[i] ) ); } int cluster_count = 0; std::vector<int> cluster_sizes; if (enable_clustering) { std::vector<int> membership(num_feat); cluster_count = pcd_utils::cluster_points( ann_points, num_feat, membership, radius_adaptation_r_max, radius_adaptation_r_min, radius_adaptation_A, radius_adaptation_K ); cluster_sizes.resize(cluster_count, 0); //cluster_sizes.assign(cluster_count, 0); for (int i = 0; i < num_feat; ++i) { extended_keypoints[i]->cluster = membership[i]; ++cluster_sizes[membership[i]]; } delete[] ann_points; } if(DEBUG) ROS_INFO_STREAM("Clusters found = " << cluster_count); //******************************************************************* // Obtain the matches from the database vt::Matches matches; db->find(full_doc, num_match, matches); // std::string documents_map[matches[i].id]->name; float matches[i].score, match_names.clear(); for ( vt::Matches::iterator it = matches.begin(); it != matches.end(); ++it) match_names.push_back( std::make_pair( it->score, documents_map[it->id]->name )); if (enable_clustering) { // store in matches_map std::map<uint32_t, float> matches_map; for ( vt::Matches::iterator it = matches.begin(); it != matches.end(); ++it) { matches_map[it->id] = it->score; } // Calculates and accumulates scores for each cluster for (int c = 0; c < cluster_count; ++c) { vt::Document cluster_doc; vt::Matches cluster_matches; for (int i = 0; i < num_feat; ++i) if ( extended_keypoints[i]->cluster == static_cast<unsigned int>(c) ) cluster_doc.push_back(full_doc[i]); if (cluster_doc.size() < static_cast<unsigned int>(min_cluster_size)) continue; db->find(cluster_doc, num_match, cluster_matches); if(DEBUG) ROS_INFO_STREAM("Cluster " << c << "(size = " << cluster_doc.size() << "):"); //update_matches_map(cluster_matches, cluster_doc.size()); for ( vt::Matches::iterator it = cluster_matches.begin(); it != cluster_matches.end(); ++it) { matches_map[it->id] = it->score; } } // Get the updated match names cluster_match_names.clear(); for( std::map<uint32_t, float>::iterator iter = matches_map.begin(); iter != matches_map.end(); ++iter ) { cluster_match_names.push_back( std::make_pair( iter->second, documents_map[iter->first]->name) ); } // sort std::sort( cluster_match_names.begin(), cluster_match_names.end() ); } }
util::Clustering::ClusterResult util::Clustering::_kmeans(unsigned k) { if (k == 0) { return ClusterResult(); } const double threshold = 0.001; const unsigned maxIterations = 5000; srand((int)(*m_data.begin())); double delta = 0.0; unsigned iteration = 0; typedef std::vector< short > Membership; Membership membership(m_data.size(), -1); typedef std::vector< float > Distances; Distances distances(k, 0.0); ClusterVector clusters; clusters.resize(k); for (unsigned i = 0; i < k; i++) { clusters[i].position = m_data[rand() % m_data.size()]; } // clustering main loop do { delta = 0.0; for (unsigned i = 0; i < k; i++) { clusters[i].data.clear(); } for (unsigned i = 0; i < m_data.size(); i++) { float value = m_data[i]; // compute which cluster lies at minimum distance to data point float dist = 0.0, minDist = 1e20; unsigned minIdx = 0; for (unsigned j = 0; j < k; j++) { if ((dist = _distance(value, clusters[j].position)) < minDist) { minDist = dist; minIdx = j; }; } clusters[minIdx].data.push_back(value); // check if membership changed if (membership[i] != minIdx) { delta += 1.0; membership[i] = minIdx; } } for (unsigned i = 0; i < k; i++) { clusters[i].updatePosition(); } delta /= m_data.size(); iteration++; } while (delta > threshold && iteration < maxIterations); // compute score for the clustering for (unsigned i = 0; i < k; i++) { clusters[i].updateScore(); } ClusterResult result; for (auto it = clusters.begin(); it != clusters.end(); ++it) { result.add(*it); } return result; }
bool MembershipBuilder::readFile() { string data; ifstream datafile; datafile.open(fileName, ifstream::in); if(!datafile) { cout << fileName << " opened failed!!" << endl; exit(-1); } getline(datafile, data, '\0'); // read the whole text file as string string delimiter = "\n"; char delim = ','; size_t pos = 0; size_t pos2 = 0; string line; string id, rank; double fee; vector<string> linesplit; while ((pos = data.find(delimiter)) != string::npos) // split the data string by \n delimiter { line = data.substr(0, pos); split(line, delim, linesplit); data.erase(0, pos + delimiter.length()); } int n = 0; for(int i = 0; i < linesplit.size(); i++) { vector<string> accessRight; if(n == 4) { n = 0; } switch(n) { case 0: id = linesplit[i]; break; case 1: rank = linesplit[i]; break; case 2: fee = atof(linesplit[i].c_str()); break; case 3: while ((pos = linesplit[i].find(delimiter)) != string::npos) // split the data string by \n delimiter { line = linesplit[i].substr(0, pos2); split(line, ':', accessRight); linesplit[i].erase(0, pos2 + delimiter.length()); } break; } if(n == 3) { Membership membership(id, rank, fee, accessRight); vectorOfMembership.push_back(membership); } n++; } datafile.close(); return true; }
void Kmeans::mic_e_step(const MatrixXdRowMajor& data_points) { int k = centroids.rows(); int d = data_points.cols(); double *host_centroids = centroids.data(); double *host_membership = membership.data(); int data_points_count = mic_stream_data_points_count; int start = 0; int size = mic_data_points_count * data_points.cols(); double *mic_process_signal_a; double *mic_process_signal_b; double *mic_transfer_out_signal_a; double *mic_transfer_out_signal_b; #pragma omp parallel sections { //mic section #pragma omp section { //send the centroids to each mic for (int i=0; i<mic_number_devices; i++) { #pragma offload_transfer target(mic:i) in(host_centroids[0:k*d]: REUSE into(mic_centroids[0:k*d])) } //main process loop for (int n=0; n<mic_number_streamings; n++) { if (n % 2 == 0) { //process a chunk of data_points for (int i=0; i<mic_number_devices; i++) { #pragma offload target(mic:i) signal(mic_process_signal_a)\ in(mic_data_points:length(0) REUSE)\ in(mic_centroids:length(0) REUSE)\ in(mic_stream_membership_a:length(0) REUSE) { #pragma omp parallel for schedule(static) for (int j=0; j<data_points_count; j++) { mic_stream_membership_a[j] = mic_find_nearest_centroid(j * d + mic_stream_a,mic_centroids, k, d); } } } //asynchroniusly transfer the results from each mic to the host for (int i=0; i<mic_number_devices; i++) { #pragma offload_transfer target(mic:i) wait(mic_process_signal_a) signal(mic_transfer_out_signal_a)\ out(mic_stream_membership_a:length(data_points_count) REUSE into(host_membership[0:data_points_count])) host_membership += data_points_count; } } else { } } } //host section #pragma omp section { #pragma omp parallel for schedule(static) for (int i=host_initial_data_point; i<data_points.rows(); i++) { membership(i) = find_nearest_centroid(data_points.row(i)); } } } }