pcluster build_simsub_cluster(pclustergeometry cf, uint size, uint * idx, uint clf) { pcluster t; uint leaves; pcluster s; assert(size > 0); leaves = 0; if (size > clf) { s = build_help_cluster(cf, idx, size, clf, 0, &leaves); t = new_cluster(size, idx, leaves, cf->dim); leaves = 0; leaves_into_sons(cf, clf, s, t, &leaves); update_bbox_cluster(t); } else { t = new_cluster(size, idx, 0, cf->dim); update_support_bbox_cluster(cf, t); } update_cluster(t); return t; }
int UnboundedLeaderFollowerClassifier::ClassifySignature( const spi::util::Signature& signature) { // int idx = FindNearestCluster(clusters, signature, similarity_threshold); // If we found an interval that is similar enough if (idx != -1) { // Update cluster signature/center clusters[idx].signature.Merge(signature, 1.0 / clusters[idx].count); // Increment count clusters[idx].count++; // If the count is above the treshold, change it to a stable // signature and give it a new phaseid if (clusters[idx].phase == TRANSITION_PHASE_ID && clusters[idx].count >= transition_threshold) { clusters[idx].phase = next_phase_id; // Generate the next phase id next_phase_id++; } return clusters[idx].phase; } // Else add a new cluster with the new signature else { // Copy the signature Cluster new_cluster(signature); // If the count is above the treshold, change it to a stable // signature and give it a new phaseid if (new_cluster.count >= transition_threshold) { new_cluster.phase = next_phase_id; // Generate the next phase id next_phase_id++; } else { new_cluster.phase = TRANSITION_PHASE_ID; } // Add new cluster clusters.push_back(new_cluster); return new_cluster.phase; } }
void db::new_term(const string& term) { throw_if_reader(); cluster * first_cluster = new_cluster(); if (first_cluster == NULL) //New file. { first_cluster = new_cluster(); } uint pos = ((char*)first_cluster) - m_work_file; uint cur = ((char*)&(first_cluster->records)) - m_work_file; first_cluster->term = 0; term_desc desc; desc.cur = cur; desc.pos = pos; m_descriptors->insert(pair<string, term_desc>(term, desc)); }
void db::add(const string & term_c, int doc_id, int freq) { throw_if_reader(); string term = term_c; fancy_term(term); auto descriptor = m_descriptors->find(term); if (descriptor == m_descriptors->end()) { new_term(term); } descriptor = m_descriptors->find(term); uint cur = descriptor->second.cur; uint cur_in_clusters = cur - sizeof(file_header); //uint idx = cur_in_clusters % sizeof(cluster); cluster * added_cluster; cluster * cur_cluster = (cluster*)(((char*)m_cluster_space) + cur_in_clusters / sizeof(cluster) * sizeof(cluster)); if (cur_cluster->cur_pos == CLUSTER_SIZE_RECORDS - 1) { //cluster is complete added_cluster = new_cluster(); if (added_cluster == NULL) { add(term_c, doc_id, freq); return; //New file - new rules. } cur_cluster->next_cluster = ((char*)added_cluster) - m_work_file; //cur_in_clusters = ((char*)&(write_to->records)) - m_work_file; } term_record rec; rec.doc_id = doc_id; rec.freq = freq; record(cur, rec); descriptor->second.cur += sizeof(term_record); if (cur_cluster->next_cluster != (uint)(-1)) { descriptor->second.cur = ((char*)&added_cluster->records) - m_work_file; } cur_cluster->cur_pos++; //cout << "[" << m_worker_id << "] Term " << term << " in doc " << doc_id // << " with freq " << freq << "\n"; }
void extend_cluster(pcluster t, uint depth) { uint i; if ((t->son) && (depth > 0)) for (i = 0; i < t->sons; i++) extend_cluster(t->son[i], depth - 1); else if ((!t->son) && (depth > 0)) { t->sons = 1; t->son = (pcluster *) allocmem(sizeof(pcluster)); t->son[0] = new_cluster(t->size, t->idx, 0, t->dim); for (i = 0; i < t->dim; i++) { t->son[0]->bmin[i] = t->bmin[i]; t->son[0]->bmax[i] = t->bmax[i]; } extend_cluster(t->son[0], depth - 1); } update_cluster(t); }
void FoF::find_friends (const Zbin &zbin, Galaxy &gal, double rfriend) { //! Function to find galaxies linked to the galaxy in question. /**< Loop through kd-tree nodes */ for(int j = 0; j < tree.node_list.size(); j++) { /**< Check if galaxy is compatible with kd-tree node */ if(node_check(gal, tree.node_list[j], rfriend)) { /**< Loop through node members */ for(int k = 0; k < tree.node_list[j].members.size(); k++) { /**< Check if galaxies are friends */ int gal_now = tree.node_list[j].members[k].num; if(friendship(zbin, gal, gal_list[gal_now], rfriend)) { /**< Create new cluster */ if(!gal.in_cluster[zbin.num]) new_cluster(zbin, gal, gal_list[gal_now]); /**< Add new member to existing cluster */ else add_member(zbin, gal_list[gal_now], list_of_clusters[cluster_count]); } } // end of node member loop } } // end of node loop }
pcluster build_adaptive_cluster(pclustergeometry cf, uint size, uint * idx, uint clf) { pcluster t; uint direction; uint size0, size1; uint i, j; real a, m; assert(size > 0); if (size > clf) { update_point_bbox_clustergeometry(cf, size, idx); /* compute the direction of partition */ direction = 0; a = cf->hmax[0] - cf->hmin[0]; for (j = 1; j < cf->dim; j++) { m = cf->hmax[j] - cf->hmin[j]; if (a < m) { a = m; direction = j; } } /* build sons */ if (a > 0.0) { m = (cf->hmax[direction] + cf->hmin[direction]) / 2.0; size0 = 0; size1 = 0; for (i = 0; i < size; i++) { if (cf->x[idx[i]][direction] < m) { j = idx[i]; idx[i] = idx[size0]; idx[size0] = j; size0++; } else { size1++; } } /* build sons */ if (size0 > 0) { if (size1 > 0) { /* both sons are not empty */ t = new_cluster(size, idx, 2, cf->dim); t->son[0] = build_adaptive_cluster(cf, size0, idx, clf); t->son[1] = build_adaptive_cluster(cf, size1, idx + size0, clf); update_bbox_cluster(t); } else { /* only the first son is not empty */ assert(size0 == size); t = new_cluster(size, idx, 0, cf->dim); update_bbox_cluster(t); } } else { /* only the second son is not empty */ assert(size1 > 0); assert(size1 == size); t = new_cluster(size, idx, 0, cf->dim); update_bbox_cluster(t); } } else { assert(a == 0.0); t = new_cluster(size, idx, 0, cf->dim); update_support_bbox_cluster(cf, t); } } else { t = new_cluster(size, idx, 0, cf->dim); update_support_bbox_cluster(cf, t); } update_cluster(t); return t; }
pcluster build_pca_cluster(pclustergeometry cf, uint size, uint * idx, uint clf) { const uint dim = cf->dim; pamatrix C, Q; pavector v; prealavector lambda; real *x, *y; real w; uint i, j, k, size0, size1; pcluster t; assert(size > 0); size0 = 0; size1 = 0; if (size > clf) { x = allocreal(dim); y = allocreal(dim); /* determine weight of current cluster */ w = 0.0; for (i = 0; i < size; ++i) { w += cf->w[idx[i]]; } w = 1.0 / w; for (j = 0; j < dim; ++j) { x[j] = 0.0; } /* determine center of mass */ for (i = 0; i < size; ++i) { for (j = 0; j < dim; ++j) { x[j] += cf->w[idx[i]] * cf->x[idx[i]][j]; } } for (j = 0; j < dim; ++j) { x[j] *= w; } C = new_zero_amatrix(dim, dim); Q = new_zero_amatrix(dim, dim); lambda = new_realavector(dim); /* setup covariance matrix */ for (i = 0; i < size; ++i) { for (j = 0; j < dim; ++j) { y[j] = cf->x[idx[i]][j] - x[j]; } for (j = 0; j < dim; ++j) { for (k = 0; k < dim; ++k) { C->a[j + k * C->ld] += cf->w[idx[i]] * y[j] * y[k]; } } } /* get eigenvalues and eigenvectors of covariance matrix */ eig_amatrix(C, lambda, Q); /* get eigenvector from largest eigenvalue */ v = new_avector(0); init_column_avector(v, Q, dim - 1); /* separate cluster with v as separation-plane */ for (i = 0; i < size; ++i) { /* x_i - X */ for (j = 0; j < dim; ++j) { y[j] = cf->x[idx[i]][j] - x[j]; } /* <y,v> */ w = 0.0; for (j = 0; j < dim; ++j) { w += y[j] * v->v[j]; } if (w >= 0.0) { j = idx[i]; idx[i] = idx[size0]; idx[size0] = j; size0++; } else { size1++; } } assert(size0 + size1 == size); del_amatrix(Q); del_amatrix(C); del_realavector(lambda); del_avector(v); freemem(x); freemem(y); /* recursion */ if (size0 > 0) { if (size1 > 0) { t = new_cluster(size, idx, 2, cf->dim); t->son[0] = build_pca_cluster(cf, size0, idx, clf); t->son[1] = build_pca_cluster(cf, size1, idx + size0, clf); update_bbox_cluster(t); } else { t = new_cluster(size, idx, 1, cf->dim); t->son[0] = build_pca_cluster(cf, size0, idx, clf); update_bbox_cluster(t); } } else { assert(size1 > 0); t = new_cluster(size, idx, 1, cf->dim); t->son[0] = build_pca_cluster(cf, size1, idx, clf); update_bbox_cluster(t); } } else { t = new_cluster(size, idx, 0, cf->dim); update_support_bbox_cluster(cf, t); } update_cluster(t); return t; }
/* auxiliary routine for build_simsub_cluster */ static pcluster build_help_cluster(pclustergeometry cf, uint * idx, uint size, uint clf, uint direction, uint * leaves) { pcluster s; uint size0, size1; uint i, j; real a, b, m; if (direction < cf->dim) { size0 = 0; size1 = 0; m = cf->hmax[direction] - cf->hmin[direction]; if (m > 0.0) { m = (cf->hmax[direction] + cf->hmin[direction]) / 2.0; for (i = 0; i < size; i++) { if (cf->x[idx[i]][direction] < m) { j = idx[i]; idx[i] = idx[size0]; idx[size0] = j; size0++; } else { size1++; } } /* build sons */ if (size0 > 0) { if (size1 > 0) { /* both sons are not empty */ s = new_cluster(size, idx, 2, cf->dim); a = cf->hmin[direction]; b = cf->hmax[direction]; cf->hmax[direction] = m; s->son[0] = build_help_cluster(cf, idx, size0, clf, direction + 1, leaves); cf->hmax[direction] = b; cf->hmin[direction] = m; s->son[1] = build_help_cluster(cf, idx + size0, size1, clf, direction + 1, leaves); cf->hmin[direction] = a; } else { /* only the first son is not empty */ s = new_cluster(size, idx, 1, cf->dim); b = cf->hmax[direction]; cf->hmax[direction] = m; s->son[0] = build_help_cluster(cf, idx, size, clf, direction + 1, leaves); cf->hmax[direction] = b; } } else { /* only the second son is not empty */ assert(size1 > 0); s = new_cluster(size, idx, 1, cf->dim); a = cf->hmin[direction]; cf->hmin[direction] = m; s->son[0] = build_help_cluster(cf, idx, size, clf, direction + 1, leaves); cf->hmin[direction] = a; } } else { assert(m == 0.0); s = new_cluster(size, idx, 1, cf->dim); s->son[0] = build_help_cluster(cf, idx, size, clf, direction + 1, leaves); } } else { s = new_cluster(size, idx, 0, cf->dim); for (i = 0; i < cf->dim; i++) { s->bmin[i] = cf->hmin[i]; s->bmax[i] = cf->hmax[i]; } leaves[0]++; } return s; }
pcluster build_regular_cluster(pclustergeometry cf, uint size, uint * idx, uint clf, uint direction) { pcluster t; uint newd; uint size0, size1; uint i, j; real a, b, m; assert(size > 0); if (size > clf) { size0 = 0; size1 = 0; update_point_bbox_clustergeometry(cf, size, idx); if (direction < cf->dim - 1) { newd = direction + 1; } else { newd = 0; } m = cf->hmax[direction] - cf->hmin[direction]; if (m > 0.0) { m = (cf->hmax[direction] + cf->hmin[direction]) / 2.0; for (i = 0; i < size; i++) { if (cf->x[idx[i]][direction] < m) { j = idx[i]; idx[i] = idx[size0]; idx[size0] = j; size0++; } else { size1++; } } /* build sons */ if (size0 > 0) { if (size1 > 0) { /* both sons are not empty */ t = new_cluster(size, idx, 2, cf->dim); a = cf->hmin[direction]; b = cf->hmax[direction]; cf->hmax[direction] = m; t->son[0] = build_regular_cluster(cf, size0, idx, clf, newd); cf->hmax[direction] = b; cf->hmin[direction] = m; t->son[1] = build_regular_cluster(cf, size1, idx + size0, clf, newd); cf->hmin[direction] = a; update_bbox_cluster(t); } else { /* only the first son is not empty */ t = new_cluster(size, idx, 1, cf->dim); b = cf->hmax[direction]; cf->hmax[direction] = m; t->son[0] = build_regular_cluster(cf, size, idx, clf, newd); cf->hmax[direction] = b; update_bbox_cluster(t); } } else { /* only the second son is not empty */ assert(size1 > 0); t = new_cluster(size, idx, 1, cf->dim); a = cf->hmin[direction]; cf->hmin[direction] = m; t->son[0] = build_regular_cluster(cf, size, idx, clf, newd); cf->hmin[direction] = a; update_bbox_cluster(t); } } else { assert(m == 0.0); t = new_cluster(size, idx, 1, cf->dim); t->son[0] = build_regular_cluster(cf, size, idx, clf, newd); update_bbox_cluster(t); } } else { t = new_cluster(size, idx, 0, cf->dim); update_support_bbox_cluster(cf, t); } update_cluster(t); return t; }
static pcluster read_cdf_part(int nc_file, size_t clusters, size_t coeffs, int nc_sons, int nc_size, int nc_coeff, uint * idx, int dim, size_t * clusteridx, size_t * coeffidx) { pcluster t, t1; uint *idx1; uint size; uint sons; uint i; size_t start, count; ptrdiff_t stride; int val, result; /* Get number of sons */ start = *clusteridx; count = 1; stride = 1; result = nc_get_vars(nc_file, nc_sons, &start, &count, &stride, &val); assert(result == NC_NOERR); sons = val; /* Get size of cluster */ result = nc_get_vars(nc_file, nc_size, &start, &count, &stride, &val); assert(result == NC_NOERR); size = val; /* Create new cluster */ t = new_cluster(size, idx, sons, dim); /* Increase cluster index */ (*clusteridx)++; /* Handle sons */ if (sons > 0) { idx1 = idx; for (i = 0; i < sons; i++) { t1 = read_cdf_part(nc_file, clusters, coeffs, nc_sons, nc_size, nc_coeff, idx1, dim, clusteridx, coeffidx); t->son[i] = t1; idx1 += t1->size; } assert(idx1 == idx + size); } /* Get bounding box */ start = (*coeffidx); count = dim; result = nc_get_vars(nc_file, nc_coeff, &start, &count, &stride, t->bmin); start += dim; result = nc_get_vars(nc_file, nc_coeff, &start, &count, &stride, t->bmax); start += dim; (*coeffidx) = start; /* Finish initialization */ update_cluster(t); return t; }