/** * returns: * - CLAIM_FIRST : if we initialized the state * - CLAIM_FOUND : if the state is LIVE and we have visited its SCC before * - CLAIM_SUCCESS : if the state is LIVE and we have not yet visited its SCC * - CLAIM_DEAD : if the state is part of a completed SCC */ char uf_make_claim (const uf_t *uf, ref_t state, size_t worker) { HREassert (worker < WORKER_BITS); sz_w w_id = 1ULL << worker; ref_t f = uf_find (uf, state); sz_w orig_pset; // is the state dead? if (atomic_read (&uf->array[f].uf_status) == UF_DEAD) return CLAIM_DEAD; // did we previously explore a state in this SCC? if ( (atomic_read (&uf->array[f].p_set) & w_id ) != 0) { return CLAIM_FOUND; // NB: cycle is possibly missed (in case f got updated) // - however, next iteration should detect this } // Add our worker ID to the set, and ensure it is the UF representative orig_pset = fetch_or (&uf->array[f].p_set, w_id); while ( atomic_read (&uf->array[f].parent) != 0 ) { f = uf_find (uf, f); fetch_or (&uf->array[f].p_set, w_id); } if (orig_pset == 0ULL) return CLAIM_FIRST; else return CLAIM_SUCCESS; }
/** Function update_uf() * Given fragment IDs of the same cluster, update the union find * structure [uf_clust] to reflect this information */ void update_uf (ivec_t& uf_clst, const ivec_t& clusters) { int sz = clusters.size (); for (int i = 0; i < sz - 1; ++ i) { for (int j = i + 1; j < sz; ++ j) { int fragID_i = clusters[i], fragID_j = clusters[j]; int root_i = uf_find (fragID_i, uf_clst), root_j = uf_find (fragID_j, uf_clst); uf_clst[root_j] = root_i; } } } // update_uf
/** * @brief Unites two components. * @details Unites the compoments the nodes `u` and `v` belong to. * @param uf Pointer to initialized uf data structure. * @param u Node that belongs to the first component. * @param v Node that belongs to the second component. */ void uf_union(uf_t *uf, uf_node_t u, uf_node_t v) { if(uf == NULL) { fprintf(stderr, "Error: null pointer in uf_union\n"); exit(0); } u = uf_find(uf, u); v = uf_find(uf, v); if(u != v) uf->parents[u] = v; }
/*合并包含两元素p和q的树集合*/ void uf_union(uf_t *t,int p,int q){ int r1=uf_find(t,p); int r2=uf_find(t,q); //返回的是索引下标,而不是id值 if(r1==r2) return; //已在同一集合内,无需再合并 /*id值作为负数时,它的相反数表示该树中结点的个数*/ if(t->id[r1] > t->id[r2]){ //r2作为根 t->id[r2] += t->id[r1]; t->id[r1]=r2; } else { t->id[r1] += t->id[r2]; t->id[r2]=r1; } t->count--; }
void uf_union(int a, int b, uf &in) { a = uf_find(a, in); b = uf_find(b, in); if (a == b) return; if (in[a].second < in[b].second) in[a].first = b; else if (in[a].second > in[b].second) in[b].first = a; else { in[a].first = b; in[b].second++; } }
int uf_find(int loc, uf &in) { if (in[loc].first == loc) return loc; in[loc].first = uf_find(in[loc].first, in); return in[loc].first; }
graph kruskal(graph G, float (*pesoArco )(void *)) { int nNodes = graphCountNodes(G); graph GF = graphInit(nNodes, GRAPH_IS_NOT_ORIENTED); // e gli orientati?? uf_handler uf = uf_init(graphGetMaxNodes(G)); archInfo arco; coda allArcs=graphGetAllArchs(G); heap archHeap=heapInit(nNodes, pesoArco, HEAP_GET_MIN); while ((arco=codaGet(allArcs))!=NULL) { heapInsert(archHeap, arco); } int from, to; while ((arco= heapExtract(archHeap))!=NULL) { from=arco->fromNode; to= arco->toNode; if (uf_find(uf,from , to)) { uf_unionFind(uf, from, to); graphAddNode(GF, from, arco->fromInfo); graphAddNode(GF, to, arco->toInfo); graphAddArch(GF, from, to, arco->archInfo); } } return GF; }
/** * returns whether or not a and b reside in the same UF set */ bool uf_sameset (const uf_t *uf, ref_t a, ref_t b) { // TODO: try to improve performance (if necessary) ref_t a_r = uf_find (uf, a); ref_t b_r = uf_find (uf, b); // return true if the representatives are equal if (a_r == b_r) return 1; // return false if the parent for a has not been updated if (atomic_read (&uf->array[a_r].parent) == 0) return 0; // otherwise retry else return uf_sameset (uf, a_r, b_r); }
// Returns the new leader (uf_name) // Stupid question: Is there any problems that arise // when uf_union is called multiple times on the same objects? // I don't think so, right? uf_name uf_union(uf_object *obj1, uf_object *obj2) { uf_name class1 = uf_find(obj1); uf_name class2 = uf_find(obj2); // Union-by-rank: // If class1 == class2, then obj1 and obj2 are already // in the same set so don't do anything! (Is this correct?) if (class1 == class2) { return class1; } if(class1->rank < class2->rank) { class1->parent = class2; return class2; } else { class2->parent = class1; if(class1->rank == class2->rank) { (class1->rank)++; } return class1; } }
/** * set the UF status for the representative of state to DEAD */ bool uf_mark_dead (const uf_t *uf, ref_t state) { bool result = false; ref_t f = uf_find (uf, state); uf_status status = atomic_read (&uf->array[f].uf_status); while ( status != UF_DEAD ) { if (status == UF_LIVE) result = cas (&uf->array[f].uf_status, UF_LIVE, UF_DEAD); status = atomic_read (&uf->array[f].uf_status); } HREassert (atomic_read (&uf->array[f].parent) == 0, "the parent of a DEAD representative should not change"); HREassert (uf_is_dead (uf, state), "state should be dead"); return result; }
/** * returns the representative for the UF set */ ref_t uf_find (const uf_t *uf, ref_t state) { //HREassert (state != 0); // recursively find and update the parent (path compression) ref_t parent = atomic_read (&uf->array[state].parent); ref_t root; if (parent == 0) return state; root = uf_find (uf, parent); if (root != parent) atomic_write (&uf->array[state].parent, root); return root; }
/** * unites the acceptance set of the uf representative with acc (via logical OR) * returns the new acceptance set for the uf representative */ uint32_t uf_add_acc (const uf_t *uf, ref_t state, uint32_t acc) { // just return the acceptance set if nothing is added if (acc == 0) return uf_get_acc (uf, state); ref_t r; uint32_t r_acc; do { r = uf_find (uf, state); r_acc = atomic_read (&uf->array[r].acc_set); // only unite if it updates the acceptance set if ( (r_acc | acc) == r_acc) return r_acc; // update! r_acc = or_fetch (&uf->array[r].acc_set, acc); } while (atomic_read (&uf->array[r].parent) != 0); return r_acc; }
uint32_t uf_get_acc (const uf_t *uf, ref_t state) { ref_t r = uf_find (uf, state); return atomic_read (&uf->array[r].acc_set); }
/** * (return == 1) ==> ensures DEAD (we cannot ensure a non-DEAD state) */ bool uf_is_dead (const uf_t *uf, ref_t state) { ref_t f = uf_find (uf, state); return ( atomic_read (&uf->array[f].uf_status) == UF_DEAD ); }
/** * unites two sets and ensures that their cyclic lists are combined to one list */ bool uf_union (const uf_t *uf, ref_t a, ref_t b) { ref_t a_r, b_r, a_l, b_l, a_n, b_n, r, q; sz_w q_w, r_w; while ( 1 ) { a_r = uf_find (uf, a); b_r = uf_find (uf, b); // find the representatives if (a_r == b_r) { return 0; } // decide on the new root (deterministically) // take the highest index as root r = a_r; q = b_r; if (a_r < b_r) { r = b_r; q = a_r; } // lock the non-root if ( !uf_lock_uf (uf, q) ) continue; break; } // lock the list entries if ( !uf_lock_list (uf, a, &a_l) ) { // HREassert ( uf_is_dead(uf, a) && uf_sameset(uf, a, b) ); return 0; } if ( !uf_lock_list (uf, b, &b_l) ) { // HREassert ( uf_is_dead(uf, b) && uf_sameset(uf, a, b) ); uf_unlock_list (uf, a_l); return 0; } // swap the list entries a_n = atomic_read (&uf->array[a_l].list_next); b_n = atomic_read (&uf->array[b_l].list_next); if (a_n == 0) // singleton a_n = a_l; if (b_n == 0) // singleton b_n = b_l; atomic_write (&uf->array[a_l].list_next, b_n); atomic_write (&uf->array[b_l].list_next, a_n); // update parent atomic_write (&uf->array[q].parent, r); // only update worker set for r if q adds workers q_w = atomic_read (&uf->array[q].p_set); r_w = atomic_read (&uf->array[r].p_set); if ( (q_w | r_w) != r_w) { // update! fetch_or (&uf->array[r].p_set, q_w); while (atomic_read (&uf->array[r].parent) != 0) { r = uf_find (uf, r); fetch_or (&uf->array[r].p_set, q_w); } } // unlock uf_unlock_list (uf, a_l); uf_unlock_list (uf, b_l); uf_unlock_uf (uf, q); return 1; }
/** Function make_cluster() * * Given a list of fragments denoted by seeds, make pairwise comparison * and clustering conforming max_mismatch criteria * * Output: clusters in 2d vector format, where each row of the vector * stores the clustered fragment IDs. */ void make_cluster (iivec_t& clusters, const ii64vec_t& list_seeds, const ivec_t& init_cluster, int max_mismatch, const ivec_t& uf_clst) { if (list_seeds.size() == 0) { abording ("DuplRm.cpp -- make_cluster(): SC failed"); } //--------- union find: (1) initialize the cluster --------- int sz = init_cluster.size(); bvec_t visited (sz, false); ivec_t clst (sz); for (int i = 0; i < sz; ++ i) clst[i] = i; //--------- pairwise comparison --------- for (int i = 0; i < sz - 1; ++ i) { if (visited[i]) continue; // to speed up int idx_i = init_cluster[i]; for (int j = i + 1; j < sz; ++ j) { if (visited[j]) continue; // to speed up, avoid of comparison // if this is already clustered int idx_j = init_cluster[j]; // check global uf structure according to fragID int root_uf_i = uf_clsfind ((int) list_seeds[idx_i].back(), uf_clst), root_uf_j = uf_clsfind ((int) list_seeds[idx_j].back(), uf_clst); if (root_uf_i != root_uf_j) { int root_i = uf_find (i, clst), root_j = uf_find (j, clst); if (root_i != root_j) { if (is_similar (list_seeds[idx_i], list_seeds[idx_j], max_mismatch)) { clst[root_j] = root_i; visited[j] = true; } } } // if } // for (int j = i + 1 } // for (int i = 0 //----- generate final cluster { clusterID --> fragment IDs } ------ std::map<int, ivec_t> clstID_fragIDs; std::map<int, ivec_t>::iterator it; for (int i = 0; i < sz; ++ i) { int idx_i = init_cluster[i]; int fragID = list_seeds[idx_i].back(); int root_i = uf_clsfind (i, clst); it = clstID_fragIDs.find (root_i); if (it != clstID_fragIDs.end()) it->second.push_back(fragID); else clstID_fragIDs[root_i] = ivec_t (1, fragID); } // for (int i = 0 // go through the map and produce clusters in sorted vector format for (it = clstID_fragIDs.begin(); it != clstID_fragIDs.end(); ++ it) { if (it->second.size() > 1) { std::sort (it->second.begin(), it->second.end()); clusters.push_back(it->second); } } // for (it } // make_cluster
/*返回并查集中包含p元素的集合大小*/ int uf_set_size(uf_t *t,int p){ int root=uf_find(t,p); return -t->id[root]; }
bool uf_connected(int a, int b, uf &in) { int aComp = uf_find(a, in); int bComp = uf_find(b, in); return aComp == bComp; }