void ComputeNeighbors(params_t *params) { int i, j, nhits; gk_csr_t *mat; int32_t *marker; gk_fkv_t *hits, *cand; FILE *fpout; printf("Reading data for %s...\n", params->infstem); mat = gk_csr_Read(params->infstem, GK_CSR_FMT_CSR, 1, 0); printf("#docs: %d, #nnz: %d.\n", mat->nrows, mat->rowptr[mat->nrows]); /* compact the column-space of the matrices */ gk_csr_CompactColumns(mat); /* perform auxiliary normalizations/pre-computations based on similarity */ gk_csr_Normalize(mat, GK_CSR_ROW, 2); /* create the inverted index */ gk_csr_CreateIndex(mat, GK_CSR_COL); /* create the output file */ fpout = (params->outfile ? gk_fopen(params->outfile, "w", "ComputeNeighbors: fpout") : NULL); /* allocate memory for the necessary working arrays */ hits = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: hits"); marker = gk_i32smalloc(mat->nrows, -1, "ComputeNeighbors: marker"); cand = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: cand"); /* find the best neighbors for each query document */ gk_startwctimer(params->timer_1); for (i=0; i<mat->nrows; i++) { if (params->verbosity > 0) printf("Working on query %7d\n", i); /* find the neighbors of the ith document */ nhits = gk_csr_GetSimilarRows(mat, mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], mat->rowval+mat->rowptr[i], GK_CSR_COS, params->nnbrs, params->minsim, hits, marker, cand); /* write the results in the file */ if (fpout) { for (j=0; j<nhits; j++) fprintf(fpout, "%8d %8d %.3f\n", i, hits[j].val, hits[j].key); } } gk_stopwctimer(params->timer_1); /* cleanup and exit */ if (fpout) gk_fclose(fpout); gk_free((void **)&hits, &marker, &cand, LTERM); gk_csr_Free(&mat); return; }
void gk_graph_ComputeBestFOrdering0(gk_graph_t *graph, int v, int type, int32_t **r_perm, int32_t **r_iperm) { ssize_t j, jj, *xadj; int i, k, u, nvtxs; int32_t *adjncy, *perm, *degrees, *minIDs, *open; gk_i32pq_t *queue; if (graph->nvtxs <= 0) return; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; /* the degree of the vertices in the closed list */ degrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: degrees"); /* the minimum vertex ID of an open vertex to the closed list */ minIDs = gk_i32smalloc(nvtxs, nvtxs+1, "gk_graph_ComputeBestFOrdering: minIDs"); /* the open list */ open = gk_i32malloc(nvtxs, "gk_graph_ComputeBestFOrdering: open"); /* if perm[i] >= 0, then perm[i] is the order of vertex i; otherwise perm[i] == -1. */ perm = gk_i32smalloc(nvtxs, -1, "gk_graph_ComputeBestFOrdering: perm"); /* create the queue and put everything in it */ queue = gk_i32pqCreate(nvtxs); for (i=0; i<nvtxs; i++) gk_i32pqInsert(queue, i, 0); gk_i32pqUpdate(queue, v, 1); open[0] = v; /* start processing the nodes */ for (i=0; i<nvtxs; i++) { if ((v = gk_i32pqGetTop(queue)) == -1) gk_errexit(SIGERR, "The priority queue got empty ahead of time [i=%d].\n", i); if (perm[v] != -1) gk_errexit(SIGERR, "The perm[%d] has already been set.\n", v); perm[v] = i; for (j=xadj[v]; j<xadj[v+1]; j++) { u = adjncy[j]; if (perm[u] == -1) { degrees[u]++; minIDs[u] = (i < minIDs[u] ? i : minIDs[u]); switch (type) { case 1: /* DFS */ gk_i32pqUpdate(queue, u, 1); break; case 2: /* Max in closed degree */ gk_i32pqUpdate(queue, u, degrees[u]); break; case 3: /* Sum of orders in closed list */ for (k=0, jj=xadj[u]; jj<xadj[u+1]; jj++) { if (perm[adjncy[jj]] != -1) k += perm[adjncy[jj]]; } gk_i32pqUpdate(queue, u, k); break; case 4: /* Sum of order-differences (w.r.t. current number) in closed list (updated once in a while) */ for (k=0, jj=xadj[u]; jj<xadj[u+1]; jj++) { if (perm[adjncy[jj]] != -1) k += (i-perm[adjncy[jj]]); } gk_i32pqUpdate(queue, u, k); break; default: ; } } } } /* time to decide what to return */ if (r_perm != NULL) { *r_perm = perm; perm = NULL; } if (r_iperm != NULL) { /* use the 'degrees' array to build the iperm array */ for (i=0; i<nvtxs; i++) degrees[perm[i]] = i; *r_iperm = degrees; degrees = NULL; } /* cleanup memory */ gk_i32pqDestroy(queue); gk_free((void **)&perm, °rees, &minIDs, &open, LTERM); }
void gk_graph_ComputeBestFOrdering(gk_graph_t *graph, int v, int type, int32_t **r_perm, int32_t **r_iperm) { ssize_t j, jj, *xadj; int i, k, u, nvtxs, nopen, ntodo; int32_t *adjncy, *perm, *degrees, *wdegrees, *sod, *level, *ot, *pos; gk_i32pq_t *queue; if (graph->nvtxs <= 0) return; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; /* the degree of the vertices in the closed list */ degrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: degrees"); /* the weighted degree of the vertices in the closed list for type==3 */ wdegrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: wdegrees"); /* the sum of differences for type==4 */ sod = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: sod"); /* the encountering level of a vertex type==5 */ level = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: level"); /* The open+todo list of vertices. The vertices from [0..nopen] are the open vertices. The vertices from [nopen..ntodo) are the todo vertices. */ ot = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: ot")); /* For a vertex that has not been explored, pos[i] is the position in the ot list. */ pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: pos")); /* if perm[i] >= 0, then perm[i] is the order of vertex i; otherwise perm[i] == -1. */ perm = gk_i32smalloc(nvtxs, -1, "gk_graph_ComputeBestFOrdering: perm"); /* create the queue and put the starting vertex in it */ queue = gk_i32pqCreate(nvtxs); gk_i32pqInsert(queue, v, 1); /* put v at the front of the open list */ pos[0] = ot[0] = v; pos[v] = ot[v] = 0; nopen = 1; ntodo = nvtxs; /* start processing the nodes */ for (i=0; i<nvtxs; i++) { if (nopen == 0) { /* deal with non-connected graphs */ gk_i32pqInsert(queue, ot[0], 1); nopen++; } if ((v = gk_i32pqGetTop(queue)) == -1) gk_errexit(SIGERR, "The priority queue got empty ahead of time [i=%d].\n", i); if (perm[v] != -1) gk_errexit(SIGERR, "The perm[%d] has already been set.\n", v); perm[v] = i; if (ot[pos[v]] != v) gk_errexit(SIGERR, "Something went wrong [ot[pos[%d]]!=%d.\n", v, v); if (pos[v] >= nopen) gk_errexit(SIGERR, "The position of v is not in open list. pos[%d]=%d is >=%d.\n", v, pos[v], nopen); /* remove v from the open list and re-arrange the todo part of the list */ ot[pos[v]] = ot[nopen-1]; pos[ot[nopen-1]] = pos[v]; if (ntodo > nopen) { ot[nopen-1] = ot[ntodo-1]; pos[ot[ntodo-1]] = nopen-1; } nopen--; ntodo--; for (j=xadj[v]; j<xadj[v+1]; j++) { u = adjncy[j]; if (perm[u] == -1) { /* update ot list, if u is not in the open list by putting it at the end of the open list. */ if (degrees[u] == 0) { ot[pos[u]] = ot[nopen]; pos[ot[nopen]] = pos[u]; ot[nopen] = u; pos[u] = nopen; nopen++; level[u] = level[v]+1; gk_i32pqInsert(queue, u, 0); } /* update the in-closed degree */ degrees[u]++; /* update the queues based on the type */ switch (type) { case 1: /* DFS */ gk_i32pqUpdate(queue, u, 1000*(i+1)+degrees[u]); break; case 2: /* Max in closed degree */ gk_i32pqUpdate(queue, u, degrees[u]); break; case 3: /* Sum of orders in closed list */ wdegrees[u] += i; gk_i32pqUpdate(queue, u, wdegrees[u]); break; case 4: /* Sum of order-differences */ /* this is handled at the end of the loop */ ; break; case 5: /* BFS with in degree priority */ gk_i32pqUpdate(queue, u, -(1000*level[u] - degrees[u])); break; case 6: /* Hybrid of 1+2 */ gk_i32pqUpdate(queue, u, (i+1)*degrees[u]); break; default: ; } } } if (type == 4) { /* update all the vertices in the open list */ for (j=0; j<nopen; j++) { u = ot[j]; if (perm[u] != -1) gk_errexit(SIGERR, "For i=%d, the open list contains a closed vertex: ot[%zd]=%d, perm[%d]=%d.\n", i, j, u, u, perm[u]); sod[u] += degrees[u]; if (i<1000 || i%25==0) gk_i32pqUpdate(queue, u, sod[u]); } } /* for (j=0; j<ntodo; j++) { if (pos[ot[j]] != j) gk_errexit(SIGERR, "pos[ot[%zd]] != %zd.\n", j, j); } */ } /* time to decide what to return */ if (r_perm != NULL) { *r_perm = perm; perm = NULL; } if (r_iperm != NULL) { /* use the 'degrees' array to build the iperm array */ for (i=0; i<nvtxs; i++) degrees[perm[i]] = i; *r_iperm = degrees; degrees = NULL; } /* cleanup memory */ gk_i32pqDestroy(queue); gk_free((void **)&perm, °rees, &wdegrees, &sod, &ot, &pos, &level, LTERM); }
void gk_graph_SingleSourceShortestPaths(gk_graph_t *graph, int v, void **r_sps) { ssize_t *xadj; int i, u, nvtxs; int32_t *adjncy, *inqueue; if (graph->nvtxs <= 0) return; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; inqueue = gk_i32smalloc(nvtxs, 0, "gk_graph_SingleSourceShortestPaths: inqueue"); /* determine if you will be computing using int32_t or float and proceed from there */ if (graph->iadjwgt != NULL) { gk_i32pq_t *queue; int32_t *adjwgt; int32_t *sps; adjwgt = graph->iadjwgt; queue = gk_i32pqCreate(nvtxs); gk_i32pqInsert(queue, v, 0); inqueue[v] = 1; sps = gk_i32smalloc(nvtxs, -1, "gk_graph_SingleSourceShortestPaths: sps"); sps[v] = 0; /* start processing the nodes */ while ((v = gk_i32pqGetTop(queue)) != -1) { inqueue[v] = 2; /* relax the adjacent edges */ for (i=xadj[v]; i<xadj[v+1]; i++) { u = adjncy[i]; if (inqueue[u] == 2) continue; if (sps[u] < 0 || sps[v]+adjwgt[i] < sps[u]) { sps[u] = sps[v]+adjwgt[i]; if (inqueue[u]) gk_i32pqUpdate(queue, u, -sps[u]); else { gk_i32pqInsert(queue, u, -sps[u]); inqueue[u] = 1; } } } } *r_sps = (void *)sps; gk_i32pqDestroy(queue); } else { gk_fpq_t *queue; float *adjwgt; float *sps; adjwgt = graph->fadjwgt; queue = gk_fpqCreate(nvtxs); gk_fpqInsert(queue, v, 0); inqueue[v] = 1; sps = gk_fsmalloc(nvtxs, -1, "gk_graph_SingleSourceShortestPaths: sps"); sps[v] = 0; /* start processing the nodes */ while ((v = gk_fpqGetTop(queue)) != -1) { inqueue[v] = 2; /* relax the adjacent edges */ for (i=xadj[v]; i<xadj[v+1]; i++) { u = adjncy[i]; if (inqueue[u] == 2) continue; if (sps[u] < 0 || sps[v]+adjwgt[i] < sps[u]) { sps[u] = sps[v]+adjwgt[i]; if (inqueue[u]) gk_fpqUpdate(queue, u, -sps[u]); else { gk_fpqInsert(queue, u, -sps[u]); inqueue[u] = 1; } } } } *r_sps = (void *)sps; gk_fpqDestroy(queue); } gk_free((void **)&inqueue, LTERM); }