static void get_neighborhood_precision_recall(char *outfile, SparseMatrix A0, real *ideal_dist_matrix, real *dist_matrix){ SparseMatrix A = A0; int i, j, k, n = A->m; // int *ia, *ja; int *g_order = NULL, *p_order = NULL;/* ordering using graph/physical distance */ real *gdist, *pdist, radius; int np_neighbors; int ng_neighbors; /*number of (graph theoretical) neighbors */ real node_dist;/* distance of a node to the center node */ real true_positive; real recall; FILE *fp; fp = fopen(outfile,"w"); if (!SparseMatrix_is_symmetric(A, FALSE)){ A = SparseMatrix_symmetrize(A, FALSE); } // ia = A->ia; // ja = A->ja; for (k = 5; k <= 50; k+= 5){ recall = 0; for (i = 0; i < n; i++){ gdist = &(ideal_dist_matrix[i*n]); vector_ordering(n, gdist, &g_order, TRUE); pdist = &(dist_matrix[i*n]); vector_ordering(n, pdist, &p_order, TRUE); ng_neighbors = MIN(n-1, k); /* set the number of closest neighbor in the graph space to consider, excluding the node itself */ np_neighbors = ng_neighbors;/* set the number of closest neighbor in the embedding to consider, excluding the node itself */ radius = pdist[p_order[np_neighbors]]; true_positive = 0; for (j = 1; j <= ng_neighbors; j++){ node_dist = pdist[g_order[j]];/* the phisical distance for j-th closest node (in graph space) */ if (node_dist <= radius) true_positive++; } recall += true_positive/np_neighbors; } recall /= n; fprintf(fp,"%d %f\n", k, recall); } fprintf(stderr,"wrote precision/recall in file %s\n", outfile); fclose(fp); if (A != A0) SparseMatrix_delete(A); FREE(g_order); FREE(p_order); }
real vector_median(int n, real *x){ /* find the median value in a list of real */ int *p = NULL; real res; vector_ordering(n, x, &p, TRUE); if ((n/2)*2 == n){ res = 0.5*(x[p[n/2-1]] + x[p[n/2]]); } else { res = x[p[n/2]]; } FREE(p); return res; }
real vector_percentile(int n, real *x, real y){ /* find the value such that y% of element of vector x is <= that value. y: a value between 0 and 1. */ int *p = NULL, i; real res; vector_ordering(n, x, &p, TRUE); y = MIN(y, 1); y = MAX(0, y); i = n*y; res = x[p[i]]; FREE(p); return res; }
void dump_distance_edge_length(char *outfile, SparseMatrix A, int dim, real *x){ int weighted = TRUE; int n, i, j, nzz; real *dist_matrix = NULL; int flag; real *dij, *xij, wij, top = 0, bot = 0, t; int *p = NULL; real dmin, dmax, xmax, xmin, bsta, bwidth, *xbin, x25, x75, median; int nbins = 30, nsta, nz = 0; FILE *fp; fp = fopen(outfile,"w"); flag = SparseMatrix_distance_matrix(A, weighted, &dist_matrix); assert(!flag); n = A->m; dij = MALLOC(sizeof(real)*(n*(n-1)/2)); xij = MALLOC(sizeof(real)*(n*(n-1)/2)); for (i = 0; i < n; i++){ for (j = i+1; j < n; j++){ dij[nz] = dist_matrix[i*n+j]; xij[nz] = distance(x, dim, i, j); if (dij[nz] > 0){ wij = 1/(dij[nz]*dij[nz]); } else { wij = 1; } top += wij * dij[nz] * xij[nz]; bot += wij*xij[nz]*xij[nz]; nz++; } } if (bot > 0){ t = top/bot; } else { t = 1; } fprintf(stderr,"scaling factor = %f\n", t); for (i = 0; i < nz; i++) xij[i] *= t; vector_ordering(nz, dij, &p, TRUE); dmin = dij[p[0]]; dmax = dij[p[nz-1]]; nbins = MIN(nbins, dmax/MAX(1,dmin));/* scale by dmin since edge length of 1 is treated as 72 points in stress/maxent/full_stress */ bwidth = (dmax - dmin)/nbins; nsta = 0; bsta = dmin; xbin = MALLOC(sizeof(real)*nz); nzz = nz; for (i = 0; i < nbins; i++){ /* the bin is [dmin + i*(dmax-dmin)/nbins, dmin + (i+1)*(dmax-dmin)/nbins] */ nz = 0; xmin = xmax = xij[p[nsta]]; while (nsta < nzz && dij[p[nsta]] >= bsta && dij[p[nsta]] <= bsta + bwidth){ xbin[nz++] = xij[p[nsta]]; xmin = MIN(xmin, xij[p[nsta]]); xmax = MAX(xmax, xij[p[nsta]]); nsta++; } /* find the median, and 25/75% */ if (nz > 0){ median = vector_median(nz, xbin); x25 = vector_percentile(nz, xbin, 0.25); x75 = vector_percentile(nz, xbin, 0.75); fprintf(fp, "%d %f %f %f %f %f %f %f\n", nz, bsta, bsta + bwidth, xmin, x25, median, x75, xmax); } else { xmin = xmax = median = x25 = x75 = (bsta + 0.5*bwidth); } bsta += bwidth; } FREE(dij); FREE(xij); FREE(xbin); FREE(p); FREE(dist_matrix); }
void country_graph_coloring(int seed, SparseMatrix A, int **p, real *norm_1){ int n = A->m, i, j, jj; SparseMatrix L, A2; int *ia = A->ia, *ja = A->ja; int a = -1.; real nrow; real *v = NULL; real norm1[2], norm2[2], norm11[2], norm22[2], norm = n; real pi, pj; int improved = TRUE; assert(A->m == A->n); A2 = SparseMatrix_symmetrize(A, TRUE); ia = A2->ia; ja = A2->ja; /* Laplacian */ L = SparseMatrix_new(n, n, 1, MATRIX_TYPE_REAL, FORMAT_COORD); for (i = 0; i < n; i++){ nrow = 0.; for (j = ia[i]; j < ia[i+1]; j++){ jj = ja[j]; if (jj != i){ nrow ++; L = SparseMatrix_coordinate_form_add_entries(L, 1, &i, &jj, &a); } } L = SparseMatrix_coordinate_form_add_entries(L, 1, &i, &i, &nrow); } L = SparseMatrix_from_coordinate_format(L); /* largest eigen vector */ { int maxit = 100; real tol = 0.00001; power_method(L, seed, maxit, tol, &v); } vector_ordering(n, v, p, TRUE); /* swapping */ while (improved){ improved = FALSE; norm = n; for (i = 0; i < n; i++){ get_local_12_norm(n, i, ia, ja, *p, norm1); for (j = 0; j < n; j++){ if (j == i) continue; get_local_12_norm(n, j, ia, ja, *p, norm2); norm = MIN(norm, norm2[0]); pi = (*p)[i]; pj = (*p)[j]; (*p)[i] = pj; (*p)[j] = pi; get_local_12_norm(n, i, ia, ja, *p, norm11); get_local_12_norm(n, j, ia, ja, *p, norm22); if (MIN(norm11[0],norm22[0]) > MIN(norm1[0],norm2[0])){ // || //(MIN(norm11[0],norm22[0]) == MIN(norm1[0],norm2[0]) && norm11[1]+norm22[1] > norm1[1]+norm2[1])) { improved = TRUE; norm1[0] = norm11[0]; norm1[1] = norm11[1]; continue; } (*p)[i] = pi; (*p)[j] = pj; } } if (Verbose) { get_12_norm(n, ia, ja, *p, norm1); fprintf(stderr, "norm = %f", norm); fprintf(stderr, "norm1 = %f, norm2 = %f\n", norm1[0], norm1[1]); } } get_12_norm(n, ia, ja, *p, norm1); *norm_1 = norm1[0]; if (A2 != A) SparseMatrix_delete(A2); SparseMatrix_delete(L); }