Beispiel #1
0
void ComputeNeighbors(params_t *params)
{
  int i, j, nhits;
  gk_csr_t *mat;
  int32_t *marker;
  gk_fkv_t *hits, *cand;
  FILE *fpout;

  printf("Reading data for %s...\n", params->infstem);

  mat = gk_csr_Read(params->infstem, GK_CSR_FMT_CSR, 1, 0);

  printf("#docs: %d, #nnz: %d.\n", mat->nrows, mat->rowptr[mat->nrows]);

  /* compact the column-space of the matrices */
  gk_csr_CompactColumns(mat);

  /* perform auxiliary normalizations/pre-computations based on similarity */
  gk_csr_Normalize(mat, GK_CSR_ROW, 2);

  /* create the inverted index */
  gk_csr_CreateIndex(mat, GK_CSR_COL);

  /* create the output file */
  fpout = (params->outfile ? gk_fopen(params->outfile, "w", "ComputeNeighbors: fpout") : NULL);

  /* allocate memory for the necessary working arrays */
  hits   = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: hits");
  marker = gk_i32smalloc(mat->nrows, -1, "ComputeNeighbors: marker");
  cand   = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: cand");


  /* find the best neighbors for each query document */
  gk_startwctimer(params->timer_1);
  for (i=0; i<mat->nrows; i++) {
    if (params->verbosity > 0)
      printf("Working on query %7d\n", i);

    /* find the neighbors of the ith document */ 
    nhits = gk_csr_GetSimilarRows(mat, 
                 mat->rowptr[i+1]-mat->rowptr[i], 
                 mat->rowind+mat->rowptr[i], 
                 mat->rowval+mat->rowptr[i], 
                 GK_CSR_COS, params->nnbrs, params->minsim, hits, 
                 marker, cand);

    /* write the results in the file */
    if (fpout) {
      for (j=0; j<nhits; j++) 
        fprintf(fpout, "%8d %8d %.3f\n", i, hits[j].val, hits[j].key);
    }
  }
  gk_stopwctimer(params->timer_1);


  /* cleanup and exit */
  if (fpout) gk_fclose(fpout);

  gk_free((void **)&hits, &marker, &cand, LTERM);

  gk_csr_Free(&mat);

  return;
}
void gk_graph_ComputeBestFOrdering0(gk_graph_t *graph, int v, int type, 
          int32_t **r_perm, int32_t **r_iperm)
{
  ssize_t j, jj, *xadj;
  int i, k, u, nvtxs;
  int32_t *adjncy, *perm, *degrees, *minIDs, *open;
  gk_i32pq_t *queue;

  if (graph->nvtxs <= 0)
    return;

  nvtxs  = graph->nvtxs;
  xadj   = graph->xadj;
  adjncy = graph->adjncy;

  /* the degree of the vertices in the closed list */
  degrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: degrees");

  /* the minimum vertex ID of an open vertex to the closed list */ 
  minIDs  = gk_i32smalloc(nvtxs, nvtxs+1, "gk_graph_ComputeBestFOrdering: minIDs");

  /* the open list */ 
  open  = gk_i32malloc(nvtxs, "gk_graph_ComputeBestFOrdering: open");

  /* if perm[i] >= 0, then perm[i] is the order of vertex i; 
     otherwise perm[i] == -1.
  */
  perm = gk_i32smalloc(nvtxs, -1, "gk_graph_ComputeBestFOrdering: perm");

  /* create the queue and put everything in it */
  queue = gk_i32pqCreate(nvtxs);
  for (i=0; i<nvtxs; i++)
    gk_i32pqInsert(queue, i, 0);
  gk_i32pqUpdate(queue, v, 1);

  open[0] = v;

  /* start processing the nodes */
  for (i=0; i<nvtxs; i++) {
    if ((v = gk_i32pqGetTop(queue)) == -1) 
      gk_errexit(SIGERR, "The priority queue got empty ahead of time [i=%d].\n", i);
    if (perm[v] != -1)
      gk_errexit(SIGERR, "The perm[%d] has already been set.\n", v);
    perm[v] = i;


    for (j=xadj[v]; j<xadj[v+1]; j++) {
      u = adjncy[j];
      if (perm[u] == -1) {
        degrees[u]++;
        minIDs[u] = (i < minIDs[u] ? i : minIDs[u]);

        switch (type) {
          case 1: /* DFS */
            gk_i32pqUpdate(queue, u, 1);
            break;
          case 2: /* Max in closed degree */
            gk_i32pqUpdate(queue, u, degrees[u]);
            break;
          case 3: /* Sum of orders in closed list */
            for (k=0, jj=xadj[u]; jj<xadj[u+1]; jj++) {
              if (perm[adjncy[jj]] != -1)
                k += perm[adjncy[jj]];
            }
            gk_i32pqUpdate(queue, u, k);
            break;
          case 4: /* Sum of order-differences (w.r.t. current number) in closed 
                     list (updated once in a while) */
            for (k=0, jj=xadj[u]; jj<xadj[u+1]; jj++) {
              if (perm[adjncy[jj]] != -1)
                k += (i-perm[adjncy[jj]]);
            }
            gk_i32pqUpdate(queue, u, k);
            break;
          default:
            ;
        }
      }
    }
  }


  /* time to decide what to return */
  if (r_perm != NULL) {
    *r_perm = perm;
    perm = NULL;
  }

  if (r_iperm != NULL) {
    /* use the 'degrees' array to build the iperm array */
    for (i=0; i<nvtxs; i++)
      degrees[perm[i]] = i;

    *r_iperm = degrees;
    degrees = NULL;
  }



  /* cleanup memory */
  gk_i32pqDestroy(queue);
  gk_free((void **)&perm, &degrees, &minIDs, &open, LTERM);

}
void gk_graph_ComputeBestFOrdering(gk_graph_t *graph, int v, int type, 
          int32_t **r_perm, int32_t **r_iperm)
{
  ssize_t j, jj, *xadj;
  int i, k, u, nvtxs, nopen, ntodo;
  int32_t *adjncy, *perm, *degrees, *wdegrees, *sod, *level, *ot, *pos;
  gk_i32pq_t *queue;

  if (graph->nvtxs <= 0)
    return;

  nvtxs  = graph->nvtxs;
  xadj   = graph->xadj;
  adjncy = graph->adjncy;

  /* the degree of the vertices in the closed list */
  degrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: degrees");

  /* the weighted degree of the vertices in the closed list for type==3 */
  wdegrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: wdegrees");

  /* the sum of differences for type==4 */
  sod = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: sod");

  /* the encountering level of a vertex type==5 */
  level = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: level");

  /* The open+todo list of vertices. 
     The vertices from [0..nopen] are the open vertices.
     The vertices from [nopen..ntodo) are the todo vertices.
     */
  ot = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: ot"));

  /* For a vertex that has not been explored, pos[i] is the position in the ot list. */
  pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: pos"));

  /* if perm[i] >= 0, then perm[i] is the order of vertex i; otherwise perm[i] == -1. */
  perm = gk_i32smalloc(nvtxs, -1, "gk_graph_ComputeBestFOrdering: perm");

  /* create the queue and put the starting vertex in it */
  queue = gk_i32pqCreate(nvtxs);
  gk_i32pqInsert(queue, v, 1);

  /* put v at the front of the open list */
  pos[0] = ot[0] = v;
  pos[v] = ot[v] = 0;
  nopen = 1;
  ntodo = nvtxs;

  /* start processing the nodes */
  for (i=0; i<nvtxs; i++) {
    if (nopen == 0) { /* deal with non-connected graphs */
      gk_i32pqInsert(queue, ot[0], 1);  
      nopen++;
    }

    if ((v = gk_i32pqGetTop(queue)) == -1)
      gk_errexit(SIGERR, "The priority queue got empty ahead of time [i=%d].\n", i);

    if (perm[v] != -1)
      gk_errexit(SIGERR, "The perm[%d] has already been set.\n", v);
    perm[v] = i;

    if (ot[pos[v]] != v)
      gk_errexit(SIGERR, "Something went wrong [ot[pos[%d]]!=%d.\n", v, v);
    if (pos[v] >= nopen)
      gk_errexit(SIGERR, "The position of v is not in open list. pos[%d]=%d is >=%d.\n", v, pos[v], nopen);

    /* remove v from the open list and re-arrange the todo part of the list */
    ot[pos[v]]       = ot[nopen-1];
    pos[ot[nopen-1]] = pos[v];
    if (ntodo > nopen) {
      ot[nopen-1]      = ot[ntodo-1];
      pos[ot[ntodo-1]] = nopen-1;
    }
    nopen--;
    ntodo--;

    for (j=xadj[v]; j<xadj[v+1]; j++) {
      u = adjncy[j];
      if (perm[u] == -1) {
        /* update ot list, if u is not in the open list by putting it at the end
           of the open list. */
        if (degrees[u] == 0) {
          ot[pos[u]]     = ot[nopen];
          pos[ot[nopen]] = pos[u];
          ot[nopen]      = u;
          pos[u]         = nopen;
          nopen++;

          level[u] = level[v]+1;
          gk_i32pqInsert(queue, u, 0);  
        }


        /* update the in-closed degree */
        degrees[u]++;

        /* update the queues based on the type */
        switch (type) {
          case 1: /* DFS */
            gk_i32pqUpdate(queue, u, 1000*(i+1)+degrees[u]);
            break;

          case 2: /* Max in closed degree */
            gk_i32pqUpdate(queue, u, degrees[u]);
            break;

          case 3: /* Sum of orders in closed list */
            wdegrees[u] += i;
            gk_i32pqUpdate(queue, u, wdegrees[u]);
            break;

          case 4: /* Sum of order-differences */
            /* this is handled at the end of the loop */
            ;
            break;

          case 5: /* BFS with in degree priority */
            gk_i32pqUpdate(queue, u, -(1000*level[u] - degrees[u]));
            break;

          case 6: /* Hybrid of 1+2 */
            gk_i32pqUpdate(queue, u, (i+1)*degrees[u]);
            break;

          default:
            ;
        }
      }
    }

    if (type == 4) { /* update all the vertices in the open list */
      for (j=0; j<nopen; j++) {
        u = ot[j];
        if (perm[u] != -1)
          gk_errexit(SIGERR, "For i=%d, the open list contains a closed vertex: ot[%zd]=%d, perm[%d]=%d.\n", i, j, u, u, perm[u]);
        sod[u] += degrees[u];
        if (i<1000 || i%25==0)
          gk_i32pqUpdate(queue, u, sod[u]);
      }
    }

    /*
    for (j=0; j<ntodo; j++) {
      if (pos[ot[j]] != j)
        gk_errexit(SIGERR, "pos[ot[%zd]] != %zd.\n", j, j);
    }
    */

  }


  /* time to decide what to return */
  if (r_perm != NULL) {
    *r_perm = perm;
    perm = NULL;
  }

  if (r_iperm != NULL) {
    /* use the 'degrees' array to build the iperm array */
    for (i=0; i<nvtxs; i++)
      degrees[perm[i]] = i;

    *r_iperm = degrees;
    degrees = NULL;
  }



  /* cleanup memory */
  gk_i32pqDestroy(queue);
  gk_free((void **)&perm, &degrees, &wdegrees, &sod, &ot, &pos, &level, LTERM);

}
void gk_graph_SingleSourceShortestPaths(gk_graph_t *graph, int v, void **r_sps)
{
  ssize_t *xadj;
  int i, u, nvtxs;
  int32_t *adjncy, *inqueue;

  if (graph->nvtxs <= 0)
    return;

  nvtxs  = graph->nvtxs;
  xadj   = graph->xadj;
  adjncy = graph->adjncy;

  inqueue = gk_i32smalloc(nvtxs, 0, "gk_graph_SingleSourceShortestPaths: inqueue");

  /* determine if you will be computing using int32_t or float and proceed from there */
  if (graph->iadjwgt != NULL) {
    gk_i32pq_t *queue;
    int32_t *adjwgt;
    int32_t *sps;

    adjwgt = graph->iadjwgt;

    queue = gk_i32pqCreate(nvtxs);
    gk_i32pqInsert(queue, v, 0);
    inqueue[v] = 1;

    sps = gk_i32smalloc(nvtxs, -1, "gk_graph_SingleSourceShortestPaths: sps");
    sps[v] = 0;

    /* start processing the nodes */
    while ((v = gk_i32pqGetTop(queue)) != -1) {
      inqueue[v] = 2;

      /* relax the adjacent edges */
      for (i=xadj[v]; i<xadj[v+1]; i++) {
        u = adjncy[i];
        if (inqueue[u] == 2)
          continue;

        if (sps[u] < 0 || sps[v]+adjwgt[i] < sps[u]) {
          sps[u] = sps[v]+adjwgt[i];

          if (inqueue[u])
            gk_i32pqUpdate(queue, u, -sps[u]);
          else {
            gk_i32pqInsert(queue, u, -sps[u]);
            inqueue[u] = 1;
          }
        }
      }
    }

    *r_sps = (void *)sps;

    gk_i32pqDestroy(queue);
  }
  else {
    gk_fpq_t *queue;
    float *adjwgt;
    float *sps;

    adjwgt = graph->fadjwgt;

    queue = gk_fpqCreate(nvtxs);
    gk_fpqInsert(queue, v, 0);
    inqueue[v] = 1;

    sps = gk_fsmalloc(nvtxs, -1, "gk_graph_SingleSourceShortestPaths: sps");
    sps[v] = 0;

    /* start processing the nodes */
    while ((v = gk_fpqGetTop(queue)) != -1) {
      inqueue[v] = 2;

      /* relax the adjacent edges */
      for (i=xadj[v]; i<xadj[v+1]; i++) {
        u = adjncy[i];
        if (inqueue[u] == 2)
          continue;

        if (sps[u] < 0 || sps[v]+adjwgt[i] < sps[u]) {
          sps[u] = sps[v]+adjwgt[i];

          if (inqueue[u])
            gk_fpqUpdate(queue, u, -sps[u]);
          else {
            gk_fpqInsert(queue, u, -sps[u]);
            inqueue[u] = 1;
          }
        }
      }
    }

    *r_sps = (void *)sps;

    gk_fpqDestroy(queue);
  }

  gk_free((void **)&inqueue, LTERM);

}