/** * \ingroup structural * \function igraph_similarity_jaccard * \brief Jaccard similarity coefficient for the given vertices. * * </para><para> * The Jaccard similarity coefficient of two vertices is the number of common * neighbors divided by the number of vertices that are neighbors of at * least one of the two vertices being considered. This function calculates * the pairwise Jaccard similarities for some (or all) of the vertices. * * \param graph The graph object to analyze * \param res Pointer to a matrix, the result of the calculation will * be stored here. The number of its rows and columns is the same * as the number of vertex ids in \p vids. * \param vids The vertex ids of the vertices for which the * calculation will be done. * \param mode The type of neighbors to be used for the calculation in * directed graphs. Possible values: * \clist * \cli IGRAPH_OUT * the outgoing edges will be considered for each node. * \cli IGRAPH_IN * the incoming edges will be considered for each node. * \cli IGRAPH_ALL * the directed graph is considered as an undirected one for the * computation. * \endclist * \param loops Whether to include the vertices themselves in the neighbor * sets. * \return Error code: * \clist * \cli IGRAPH_ENOMEM * not enough memory for temporary data. * \cli IGRAPH_EINVVID * invalid vertex id passed. * \cli IGRAPH_EINVMODE * invalid mode argument. * \endclist * * Time complexity: O(|V|^2 d), * |V| is the number of vertices in the vertex iterator given, d is the * (maximum) degree of the vertices in the graph. * * \sa \ref igraph_similarity_dice(), a measure very similar to the Jaccard * coefficient * * \example examples/simple/igraph_similarity.c */ int igraph_similarity_jaccard(const igraph_t *graph, igraph_matrix_t *res, const igraph_vs_t vids, igraph_neimode_t mode, igraph_bool_t loops) { igraph_lazy_adjlist_t al; igraph_vit_t vit, vit2; long int i, j, k; long int len_union, len_intersection; igraph_vector_t *v1, *v2; IGRAPH_CHECK(igraph_vit_create(graph, vids, &vit)); IGRAPH_FINALLY(igraph_vit_destroy, &vit); IGRAPH_CHECK(igraph_vit_create(graph, vids, &vit2)); IGRAPH_FINALLY(igraph_vit_destroy, &vit2); IGRAPH_CHECK(igraph_lazy_adjlist_init(graph, &al, mode, IGRAPH_SIMPLIFY)); IGRAPH_FINALLY(igraph_lazy_adjlist_destroy, &al); IGRAPH_CHECK(igraph_matrix_resize(res, IGRAPH_VIT_SIZE(vit), IGRAPH_VIT_SIZE(vit))); if (loops) { for (IGRAPH_VIT_RESET(vit); !IGRAPH_VIT_END(vit); IGRAPH_VIT_NEXT(vit)) { i=IGRAPH_VIT_GET(vit); v1=igraph_lazy_adjlist_get(&al, (igraph_integer_t) i); if (!igraph_vector_binsearch(v1, i, &k)) igraph_vector_insert(v1, k, i); } } for (IGRAPH_VIT_RESET(vit), i=0; !IGRAPH_VIT_END(vit); IGRAPH_VIT_NEXT(vit), i++) { MATRIX(*res, i, i) = 1.0; for (IGRAPH_VIT_RESET(vit2), j=0; !IGRAPH_VIT_END(vit2); IGRAPH_VIT_NEXT(vit2), j++) { if (j <= i) continue; v1=igraph_lazy_adjlist_get(&al, IGRAPH_VIT_GET(vit)); v2=igraph_lazy_adjlist_get(&al, IGRAPH_VIT_GET(vit2)); igraph_i_neisets_intersect(v1, v2, &len_union, &len_intersection); if (len_union > 0) MATRIX(*res, i, j) = ((igraph_real_t)len_intersection)/len_union; else MATRIX(*res, i, j) = 0.0; MATRIX(*res, j, i) = MATRIX(*res, i, j); } } igraph_lazy_adjlist_destroy(&al); igraph_vit_destroy(&vit); igraph_vit_destroy(&vit2); IGRAPH_FINALLY_CLEAN(3); return 0; }
int igraph_i_maximal_cliques(const igraph_t *graph, igraph_i_maximal_clique_func_t func, void* data) { int directed=igraph_is_directed(graph); long int i, j, k, l; igraph_integer_t no_of_nodes, nodes_to_check, nodes_done; igraph_integer_t best_cand = 0, best_cand_degree = 0, best_fini_cand_degree; igraph_adjlist_t adj_list; igraph_stack_ptr_t stack; igraph_i_maximal_cliques_stack_frame frame, *new_frame_ptr; igraph_vector_t clique, new_cand, new_fini, cn, best_cand_nbrs, best_fini_cand_nbrs; igraph_bool_t cont = 1; int assret; if (directed) IGRAPH_WARNING("directionality of edges is ignored for directed graphs"); no_of_nodes = igraph_vcount(graph); if (no_of_nodes == 0) return IGRAPH_SUCCESS; /* Construct an adjacency list representation */ IGRAPH_CHECK(igraph_adjlist_init(graph, &adj_list, IGRAPH_ALL)); IGRAPH_FINALLY(igraph_adjlist_destroy, &adj_list); IGRAPH_CHECK(igraph_adjlist_simplify(&adj_list)); igraph_adjlist_sort(&adj_list); /* Initialize stack */ IGRAPH_CHECK(igraph_stack_ptr_init(&stack, 0)); IGRAPH_FINALLY(igraph_i_maximal_cliques_stack_destroy, &stack); /* Create the initial (empty) clique */ IGRAPH_VECTOR_INIT_FINALLY(&clique, 0); /* Initialize new_cand, new_fini, cn, best_cand_nbrs and best_fini_cand_nbrs (will be used later) */ IGRAPH_VECTOR_INIT_FINALLY(&new_cand, 0); IGRAPH_VECTOR_INIT_FINALLY(&new_fini, 0); IGRAPH_VECTOR_INIT_FINALLY(&cn, 0); IGRAPH_VECTOR_INIT_FINALLY(&best_cand_nbrs, 0); IGRAPH_VECTOR_INIT_FINALLY(&best_fini_cand_nbrs, 0); /* Find the vertex with the highest degree */ best_cand = 0; best_cand_degree = igraph_vector_size(igraph_adjlist_get(&adj_list, 0)); for (i = 1; i < no_of_nodes; i++) { j = igraph_vector_size(igraph_adjlist_get(&adj_list, i)); if (j > best_cand_degree) { best_cand = i; best_cand_degree = j; } } /* Create the initial stack frame */ IGRAPH_CHECK(igraph_vector_init_seq(&frame.cand, 0, no_of_nodes-1)); IGRAPH_FINALLY(igraph_vector_destroy, &frame.cand); IGRAPH_CHECK(igraph_vector_init(&frame.fini, 0)); IGRAPH_FINALLY(igraph_vector_destroy, &frame.fini); IGRAPH_CHECK(igraph_vector_init(&frame.cand_filtered, 0)); IGRAPH_FINALLY(igraph_vector_destroy, &frame.cand_filtered); IGRAPH_CHECK(igraph_vector_difference_sorted(&frame.cand, igraph_adjlist_get(&adj_list, best_cand), &frame.cand_filtered)); IGRAPH_FINALLY_CLEAN(3); IGRAPH_FINALLY(igraph_i_maximal_cliques_stack_frame_destroy, &frame); /* TODO: frame.cand and frame.fini should be a set instead of a vector */ /* Main loop starts here */ nodes_to_check = igraph_vector_size(&frame.cand_filtered); nodes_done = 0; while (!igraph_vector_empty(&frame.cand_filtered) || !igraph_stack_ptr_empty(&stack)) { if (igraph_vector_empty(&frame.cand_filtered)) { /* No candidates left to check in this stack frame, pop out the previous stack frame */ igraph_i_maximal_cliques_stack_frame *newframe = igraph_stack_ptr_pop(&stack); igraph_i_maximal_cliques_stack_frame_destroy(&frame); frame = *newframe; free(newframe); if (igraph_stack_ptr_size(&stack) == 1) { /* We will be using the next candidate node in the next iteration, so we can increase * nodes_done by 1 */ nodes_done++; } /* For efficiency reasons, we only check for interruption and show progress here */ IGRAPH_PROGRESS("Maximal cliques: ", 100.0 * nodes_done / nodes_to_check, NULL); IGRAPH_ALLOW_INTERRUPTION(); igraph_vector_pop_back(&clique); continue; } /* Try the next node in the clique */ i = igraph_vector_pop_back(&frame.cand_filtered); IGRAPH_CHECK(igraph_vector_push_back(&clique, i)); /* Remove the node from the candidate list */ assret=igraph_vector_binsearch(&frame.cand, i, &j); assert(assret); igraph_vector_remove(&frame.cand, j); /* Add the node to the finished list */ assret = !igraph_vector_binsearch(&frame.fini, i, &j); assert(assret); IGRAPH_CHECK(igraph_vector_insert(&frame.fini, j, i)); /* Create new_cand and new_fini */ IGRAPH_CHECK(igraph_vector_intersect_sorted(&frame.cand, igraph_adjlist_get(&adj_list, i), &new_cand)); IGRAPH_CHECK(igraph_vector_intersect_sorted(&frame.fini, igraph_adjlist_get(&adj_list, i), &new_fini)); /* Do we have anything more to search? */ if (igraph_vector_empty(&new_cand)) { if (igraph_vector_empty(&new_fini)) { /* We have a maximal clique here */ IGRAPH_CHECK(func(&clique, data, &cont)); if (!cont) { /* The callback function requested to stop the search */ break; } } igraph_vector_pop_back(&clique); continue; } if (igraph_vector_empty(&new_fini) && igraph_vector_size(&new_cand) == 1) { /* Shortcut: only one node left */ IGRAPH_CHECK(igraph_vector_push_back(&clique, VECTOR(new_cand)[0])); IGRAPH_CHECK(func(&clique, data, &cont)); if (!cont) { /* The callback function requested to stop the search */ break; } igraph_vector_pop_back(&clique); igraph_vector_pop_back(&clique); continue; } /* Find the next best candidate node in new_fini */ l = igraph_vector_size(&new_cand); best_cand_degree = -1; j = igraph_vector_size(&new_fini); for (i = 0; i < j; i++) { k = (long int)VECTOR(new_fini)[i]; IGRAPH_CHECK(igraph_vector_intersect_sorted(&new_cand, igraph_adjlist_get(&adj_list, k), &cn)); if (igraph_vector_size(&cn) > best_cand_degree) { best_cand_degree = igraph_vector_size(&cn); IGRAPH_CHECK(igraph_vector_update(&best_fini_cand_nbrs, &cn)); if (best_cand_degree == l) { /* Cool, we surely have the best candidate node here as best_cand_degree can't get any better */ break; } } } /* Shortcut here: we don't have to examine new_cand */ if (best_cand_degree == l) { igraph_vector_pop_back(&clique); continue; } /* Still finding best candidate node */ best_fini_cand_degree = best_cand_degree; best_cand_degree = -1; j = igraph_vector_size(&new_cand); l = l - 1; for (i = 0; i < j; i++) { k = (long int)VECTOR(new_cand)[i]; IGRAPH_CHECK(igraph_vector_intersect_sorted(&new_cand, igraph_adjlist_get(&adj_list, k), &cn)); if (igraph_vector_size(&cn) > best_cand_degree) { best_cand_degree = igraph_vector_size(&cn); IGRAPH_CHECK(igraph_vector_update(&best_cand_nbrs, &cn)); if (best_cand_degree == l) { /* Cool, we surely have the best candidate node here as best_cand_degree can't get any better */ break; } } } /* Create a new stack frame in case we back out later */ new_frame_ptr = igraph_Calloc(1, igraph_i_maximal_cliques_stack_frame); if (new_frame_ptr == 0) { IGRAPH_ERROR("cannot allocate new stack frame", IGRAPH_ENOMEM); } IGRAPH_FINALLY(igraph_free, new_frame_ptr); *new_frame_ptr = frame; memset(&frame, 0, sizeof(frame)); IGRAPH_CHECK(igraph_stack_ptr_push(&stack, new_frame_ptr)); IGRAPH_FINALLY_CLEAN(1); /* ownership of new_frame_ptr taken by the stack */ /* Ownership of the current frame and its vectors (frame.cand, frame.done, frame.cand_filtered) * is taken by the stack from now on. Vectors in frame must be re-initialized with new_cand, * new_fini and stuff. The old frame.cand and frame.fini won't be leaked because they are * managed by the stack now. */ frame.cand = new_cand; frame.fini = new_fini; IGRAPH_CHECK(igraph_vector_init(&new_cand, 0)); IGRAPH_CHECK(igraph_vector_init(&new_fini, 0)); IGRAPH_CHECK(igraph_vector_init(&frame.cand_filtered, 0)); /* Adjust frame.cand_filtered */ if (best_cand_degree < best_fini_cand_degree) { IGRAPH_CHECK(igraph_vector_difference_sorted(&frame.cand, &best_fini_cand_nbrs, &frame.cand_filtered)); } else { IGRAPH_CHECK(igraph_vector_difference_sorted(&frame.cand, &best_cand_nbrs, &frame.cand_filtered)); } } IGRAPH_PROGRESS("Maximal cliques: ", 100.0, NULL); igraph_adjlist_destroy(&adj_list); igraph_vector_destroy(&clique); igraph_vector_destroy(&new_cand); igraph_vector_destroy(&new_fini); igraph_vector_destroy(&cn); igraph_vector_destroy(&best_cand_nbrs); igraph_vector_destroy(&best_fini_cand_nbrs); igraph_i_maximal_cliques_stack_frame_destroy(&frame); igraph_i_maximal_cliques_stack_destroy(&stack); IGRAPH_FINALLY_CLEAN(9); return IGRAPH_SUCCESS; }
igraph_vector_t * ggen_analyze_longest_antichain(igraph_t *g) { /* The following steps are implemented : * - Convert our DAG to a specific bipartite graph B * - solve maximum matching on B * - conver maximum matching to min vectex cover * - convert min vertex cover to antichain on G */ int err; unsigned long i,vg,found,added; igraph_t b,gstar; igraph_vector_t edges,*res = NULL; igraph_vector_t c,s,t,todo,n,next,l,r; igraph_eit_t eit; igraph_es_t es; igraph_integer_t from,to; igraph_vit_t vit; igraph_vs_t vs; igraph_real_t value; if(g == NULL) return NULL; /* before creating the bipartite graph, we need all relations * between any two vertices : the transitive closure of g */ err = igraph_copy(&gstar,g); if(err) return NULL; err = ggen_transform_transitive_closure(&gstar); if(err) goto error; /* Bipartite convertion : let G = (S,C), * we build B = (U,V,E) with * - U = V = S (each vertex is present twice) * - (u,v) \in E iff : * - u \in U * - v \in V * - u < v in C (warning, this means that we take * transitive closure into account, not just the * original edges) * We will also need two additional nodes further in the code. */ vg = igraph_vcount(g); err = igraph_empty(&b,vg*2,1); if(err) goto error; /* id and id+vg will be a vertex in U and its copy in V, * iterate over gstar edges to create edges in b */ err = igraph_vector_init(&edges,igraph_ecount(&gstar)); if(err) goto d_b; igraph_vector_clear(&edges); err = igraph_eit_create(&gstar,igraph_ess_all(IGRAPH_EDGEORDER_ID),&eit); if(err) goto d_edges; for(IGRAPH_EIT_RESET(eit); !IGRAPH_EIT_END(eit); IGRAPH_EIT_NEXT(eit)) { err = igraph_edge(&gstar,IGRAPH_EIT_GET(eit),&from,&to); if(err) { igraph_eit_destroy(&eit); goto d_edges; } to += vg; igraph_vector_push_back(&edges,(igraph_real_t)from); igraph_vector_push_back(&edges,(igraph_real_t)to); } igraph_eit_destroy(&eit); err = igraph_add_edges(&b,&edges,NULL); if(err) goto d_edges; /* maximum matching on b */ igraph_vector_clear(&edges); err = bipartite_maximum_matching(&b,&edges); if(err) goto d_edges; /* Let M be the max matching, and N be E - M * Define T as all unmatched vectices from U as well as all vertices * reachable from those by going left-to-right along N and right-to-left along * M. * Define L = U - T, R = V \inter T * C:= L + R * C is a minimum vertex cover */ err = igraph_vector_init_seq(&n,0,igraph_ecount(&b)-1); if(err) goto d_edges; err = vector_diff(&n,&edges); if(err) goto d_n; err = igraph_vector_init(&c,vg); if(err) goto d_n; igraph_vector_clear(&c); /* matched vertices : S */ err = igraph_vector_init(&s,vg); if(err) goto d_c; igraph_vector_clear(&s); for(i = 0; i < igraph_vector_size(&edges); i++) { err = igraph_edge(&b,VECTOR(edges)[i],&from,&to); if(err) goto d_s; igraph_vector_push_back(&s,from); } /* we may have inserted the same vertex multiple times */ err = vector_uniq(&s); if(err) goto d_s; /* unmatched */ err = igraph_vector_init_seq(&t,0,vg-1); if(err) goto d_s; err = vector_diff(&t,&s); if(err) goto d_t; /* alternating paths */ err = igraph_vector_copy(&todo,&t); if(err) goto d_t; err = igraph_vector_init(&next,vg); if(err) goto d_todo; igraph_vector_clear(&next); do { vector_uniq(&todo); added = 0; for(i = 0; i < igraph_vector_size(&todo); i++) { if(VECTOR(todo)[i] < vg) { /* scan edges */ err = igraph_es_adj(&es,VECTOR(todo)[i],IGRAPH_OUT); if(err) goto d_next; err = igraph_eit_create(&b,es,&eit); if(err) { igraph_es_destroy(&es); goto d_next; } for(IGRAPH_EIT_RESET(eit); !IGRAPH_EIT_END(eit); IGRAPH_EIT_NEXT(eit)) { if(igraph_vector_binsearch(&n,IGRAPH_EIT_GET(eit),NULL)) { err = igraph_edge(&b,IGRAPH_EIT_GET(eit),&from,&to); if(err) { igraph_eit_destroy(&eit); igraph_es_destroy(&es); goto d_next; } if(!igraph_vector_binsearch(&t,to,NULL)) { igraph_vector_push_back(&next,to); added = 1; } } } } else { /* scan edges */ err = igraph_es_adj(&es,VECTOR(todo)[i],IGRAPH_IN); if(err) goto d_next; err = igraph_eit_create(&b,es,&eit); if(err) { igraph_es_destroy(&es); goto d_next; } for(IGRAPH_EIT_RESET(eit); !IGRAPH_EIT_END(eit); IGRAPH_EIT_NEXT(eit)) { if(igraph_vector_binsearch(&edges,IGRAPH_EIT_GET(eit),NULL)) { err = igraph_edge(&b,IGRAPH_EIT_GET(eit),&from,&to); if(err) { igraph_eit_destroy(&eit); igraph_es_destroy(&es); goto d_next; } if(!igraph_vector_binsearch(&t,to,NULL)) { igraph_vector_push_back(&next,from); added = 1; } } } } igraph_es_destroy(&es); igraph_eit_destroy(&eit); } igraph_vector_append(&t,&todo); igraph_vector_clear(&todo); igraph_vector_append(&todo,&next); igraph_vector_clear(&next); } while(added); err = igraph_vector_init_seq(&l,0,vg-1); if(err) goto d_t; err = vector_diff(&l,&t); if(err) goto d_l; err = igraph_vector_update(&c,&l); if(err) goto d_l; err = igraph_vector_init(&r,vg); if(err) goto d_l; igraph_vector_clear(&r); /* compute V \inter T */ for(i = 0; i < igraph_vector_size(&t); i++) { if(VECTOR(t)[i] >= vg) igraph_vector_push_back(&r,VECTOR(t)[i]); } igraph_vector_add_constant(&r,(igraph_real_t)-vg); err = vector_union(&c,&r); if(err) goto d_r; /* our antichain is U - C */ res = malloc(sizeof(igraph_vector_t)); if(res == NULL) goto d_r; err = igraph_vector_init_seq(res,0,vg-1); if(err) goto f_res; err = vector_diff(res,&c); if(err) goto d_res; goto ret; d_res: igraph_vector_destroy(res); f_res: free(res); res = NULL; ret: d_r: igraph_vector_destroy(&r); d_l: igraph_vector_destroy(&l); d_next: igraph_vector_destroy(&next); d_todo: igraph_vector_destroy(&todo); d_t: igraph_vector_destroy(&t); d_s: igraph_vector_destroy(&s); d_c: igraph_vector_destroy(&c); d_n: igraph_vector_destroy(&n); d_edges: igraph_vector_destroy(&edges); d_b: igraph_destroy(&b); error: igraph_destroy(&gstar); return res; }
/** * \ingroup structural * \function igraph_similarity_jaccard_pairs * \brief Jaccard similarity coefficient for given vertex pairs. * * </para><para> * The Jaccard similarity coefficient of two vertices is the number of common * neighbors divided by the number of vertices that are neighbors of at * least one of the two vertices being considered. This function calculates * the pairwise Jaccard similarities for a list of vertex pairs. * * \param graph The graph object to analyze * \param res Pointer to a vector, the result of the calculation will * be stored here. The number of elements is the same as the number * of pairs in \p pairs. * \param pairs A vector that contains the pairs for which the similarity * will be calculated. Each pair is defined by two consecutive elements, * i.e. the first and second element of the vector specifies the first * pair, the third and fourth element specifies the second pair and so on. * \param mode The type of neighbors to be used for the calculation in * directed graphs. Possible values: * \clist * \cli IGRAPH_OUT * the outgoing edges will be considered for each node. * \cli IGRAPH_IN * the incoming edges will be considered for each node. * \cli IGRAPH_ALL * the directed graph is considered as an undirected one for the * computation. * \endclist * \param loops Whether to include the vertices themselves in the neighbor * sets. * \return Error code: * \clist * \cli IGRAPH_ENOMEM * not enough memory for temporary data. * \cli IGRAPH_EINVVID * invalid vertex id passed. * \cli IGRAPH_EINVMODE * invalid mode argument. * \endclist * * Time complexity: O(nd), n is the number of pairs in the given vector, d is * the (maximum) degree of the vertices in the graph. * * \sa \ref igraph_similarity_jaccard() to calculate the Jaccard similarity * between all pairs of a vertex set, or \ref igraph_similarity_dice() and * \ref igraph_similarity_dice_pairs() for a measure very similar to the * Jaccard coefficient * * \example examples/simple/igraph_similarity.c */ int igraph_similarity_jaccard_pairs(const igraph_t *graph, igraph_vector_t *res, const igraph_vector_t *pairs, igraph_neimode_t mode, igraph_bool_t loops) { igraph_lazy_adjlist_t al; long int i, j, k, u, v; long int len_union, len_intersection; igraph_vector_t *v1, *v2; igraph_bool_t *seen; k = igraph_vector_size(pairs); if (k % 2 != 0) IGRAPH_ERROR("number of elements in `pairs' must be even", IGRAPH_EINVAL); IGRAPH_CHECK(igraph_vector_resize(res, k/2)); IGRAPH_CHECK(igraph_lazy_adjlist_init(graph, &al, mode, IGRAPH_SIMPLIFY)); IGRAPH_FINALLY(igraph_lazy_adjlist_destroy, &al); if (loops) { /* Add the loop edges */ i = igraph_vcount(graph); seen = igraph_Calloc(i, igraph_bool_t); if (seen == 0) IGRAPH_ERROR("cannot calculate Jaccard similarity", IGRAPH_ENOMEM); IGRAPH_FINALLY(free, seen); for (i = 0; i < k; i++) { j = (long int) VECTOR(*pairs)[i]; if (seen[j]) continue; seen[j] = 1; v1=igraph_lazy_adjlist_get(&al, (igraph_integer_t) j); if (!igraph_vector_binsearch(v1, j, &u)) igraph_vector_insert(v1, u, j); } free(seen); IGRAPH_FINALLY_CLEAN(1); } for (i = 0, j = 0; i < k; i += 2, j++) { u = (long int) VECTOR(*pairs)[i]; v = (long int) VECTOR(*pairs)[i+1]; if (u == v) { VECTOR(*res)[j] = 1.0; continue; } v1 = igraph_lazy_adjlist_get(&al, (igraph_integer_t) u); v2 = igraph_lazy_adjlist_get(&al, (igraph_integer_t) v); igraph_i_neisets_intersect(v1, v2, &len_union, &len_intersection); if (len_union > 0) VECTOR(*res)[j] = ((igraph_real_t)len_intersection) / len_union; else VECTOR(*res)[j] = 0.0; } igraph_lazy_adjlist_destroy(&al); IGRAPH_FINALLY_CLEAN(1); return 0; }