Example #1
0
/*************************************************************************
* This function writes weighted  graph into a file
**************************************************************************/
void WriteWgtGraph(char *filename, idxtype nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt)
{
  idxtype i, j;
  FILE *fpout;

  fpout = gk_fopen(filename, "w", __func__);

  mfprintf(fpout, "%D %D", nvtxs, xadj[nvtxs]/2);
  mfprintf(fpout, " %D", 10);

  for (i=0; i<nvtxs; i++) {
    fprintf(fpout, "\n");
    fprintf(fpout, "%" PRIIDX, vwgt[i]);
    
    for (j=xadj[i]; j<xadj[i+1]; j++)
      fprintf(fpout, " %" PRIIDX, adjncy[j]+1);
  }

  gk_fclose(fpout);
}
Example #2
0
/************************************************************************
* This function reads the element node array of a i Mixed mesh
**************************************************************************/
idxtype *ReadMgcnums(char *filename)
{
  idxtype i;
  idxtype *mgc;
  FILE *fpin;

  fpin = gk_fopen(filename, "r", __func__);

  mgc = idxmalloc(36, "Readmgcnums: mgcnums");

  for (i=0; i<36; i++) {
    if (i<6 || i%6==0) 
      mgc[i]=-1;
    else 
      mfscanf(fpin, "%D", mgc+i);
  }

  gk_fclose(fpin);

  return mgc;
}
Example #3
0
/*************************************************************************
* This function reads the spd matrix
**************************************************************************/
void ReadCoordinates(GraphType *graph, char *filename)
{
  idxtype i, j, k, l, nvtxs, fmt, readew, readvw, ncon, edge, ewgt;
  FILE *fpin;
  char *line;


  fpin = gk_fopen(filename, "r", __func__);

  nvtxs = graph->nvtxs;

  graph->coords = gk_dsmalloc(3*nvtxs, 0.0, "ReadCoordinates: coords");

  line = gk_cmalloc(MAXLINE+1, "ReadCoordinates: line");

  for (i=0; i<nvtxs; i++) {
    fgets(line, MAXLINE, fpin);
    msscanf(line, "%lf %lf %lf", graph->coords+3*i+0, graph->coords+3*i+1, graph->coords+3*i+2);
  }
    
  gk_fclose(fpin);

  gk_free((void **)&line, LTERM);
}
gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename)
{
    gk_seq_t *seq;
    gk_idx_t i, j, ii;
    size_t ntokens, nbytes, len;
    FILE *fpin;
    
    
    gk_Tokens_t tokens;
    static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*";
    static int PSSMWIDTH = 20;
    char *header, line[MAXLINELEN];
    gk_i2cc2i_t *converter;

    header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header");
    
    converter = gk_i2cc2i_create_common(AAORDER);
    
    gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes);
    len --;

    seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM");
    gk_seq_init(seq);
    
    seq->len = len;
    seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM");
    seq->pssm     = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
    seq->psfm     = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
    
    seq->nsymbols = PSSMWIDTH;
    seq->name     = gk_getbasename(filename);
    
    fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM");


    /* Read the header line */
    if (fgets(line, MAXLINELEN-1, fpin) == NULL)
      errexit("Unexpected end of file: %s\n", filename);
    gk_strtoupper(line);
    gk_strtokenize(line, " \t\n", &tokens);

    for (i=0; i<PSSMWIDTH; i++)
	header[i] = tokens.list[i][0];
    
    gk_freetokenslist(&tokens);
    

    /* Read the rest of the lines */
    for (i=0, ii=0; ii<len; ii++) {
	if (fgets(line, MAXLINELEN-1, fpin) == NULL)
          errexit("Unexpected end of file: %s\n", filename);
	gk_strtoupper(line);
	gk_strtokenize(line, " \t\n", &tokens);
	
	seq->sequence[i] = converter->c2i[(int)tokens.list[1][0]];
	
	for (j=0; j<PSSMWIDTH; j++) {
	    seq->pssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]);
	    seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]);
	}
	
      
	
	gk_freetokenslist(&tokens);
	i++;
    }
    
    seq->len = i; /* Reset the length if certain characters were skipped */
    
    gk_free((void **)&header, LTERM);
    gk_fclose(fpin);

    return seq;
}
Example #5
0
File: sd.c Project: shizunge/XU_1
void ComputeNeighbors(params_t *params)
{
  int i, j, nhits;
  gk_csr_t *mat;
  int32_t *marker;
  gk_fkv_t *hits, *cand;
  FILE *fpout;

  printf("Reading data for %s...\n", params->infstem);

  mat = gk_csr_Read(params->infstem, GK_CSR_FMT_CSR, 1, 0);

  printf("#docs: %d, #nnz: %d.\n", mat->nrows, mat->rowptr[mat->nrows]);

  /* compact the column-space of the matrices */
  gk_csr_CompactColumns(mat);

  /* perform auxiliary normalizations/pre-computations based on similarity */
  gk_csr_Normalize(mat, GK_CSR_ROW, 2);

  /* create the inverted index */
  gk_csr_CreateIndex(mat, GK_CSR_COL);

  /* create the output file */
  fpout = (params->outfile ? gk_fopen(params->outfile, "w", "ComputeNeighbors: fpout") : NULL);

  /* allocate memory for the necessary working arrays */
  hits   = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: hits");
  marker = gk_i32smalloc(mat->nrows, -1, "ComputeNeighbors: marker");
  cand   = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: cand");


  /* find the best neighbors for each query document */
  gk_startwctimer(params->timer_1);
  for (i=0; i<mat->nrows; i++) {
    if (params->verbosity > 0)
      printf("Working on query %7d\n", i);

    /* find the neighbors of the ith document */ 
    nhits = gk_csr_GetSimilarRows(mat, 
                 mat->rowptr[i+1]-mat->rowptr[i], 
                 mat->rowind+mat->rowptr[i], 
                 mat->rowval+mat->rowptr[i], 
                 GK_CSR_COS, params->nnbrs, params->minsim, hits, 
                 marker, cand);

    /* write the results in the file */
    if (fpout) {
      for (j=0; j<nhits; j++) 
        fprintf(fpout, "%8d %8d %.3f\n", i, hits[j].val, hits[j].key);
    }
  }
  gk_stopwctimer(params->timer_1);


  /* cleanup and exit */
  if (fpout) gk_fclose(fpout);

  gk_free((void **)&hits, &marker, &cand, LTERM);

  gk_csr_Free(&mat);

  return;
}
Example #6
0
/*************************************************************************
* This function reads the spd matrix
**************************************************************************/
void ReadGraph(GraphType *graph, char *filename, idxtype *wgtflag)
{
  idxtype i, j, k, l, fmt, readew, readvw, ncon, edge, ewgt;
  idxtype *xadj, *adjncy, *vwgt, *adjwgt;
  char *line, *oldstr, *newstr;
  FILE *fpin;

  InitGraph(graph);

  line = gk_cmalloc(MAXLINE+1, "ReadGraph: line");

  fpin = gk_fopen(filename, "r", __func__);

  do {
    fgets(line, MAXLINE, fpin);
  } while (line[0] == '%' && !feof(fpin));

  if (feof(fpin)) {
    graph->nvtxs = 0;
    gk_free((void **)&line, LTERM);
    return;
  }

  fmt = ncon = 0;
  msscanf(line, "%D %D %D %D", &(graph->nvtxs), &(graph->nedges), &fmt, &ncon);


  readew = (fmt%10 > 0);
  readvw = ((fmt/10)%10 > 0);
  if (fmt >= 100) {
    mprintf("Cannot read this type of file format!");
    exit(0);
  }


  *wgtflag = 0;
  if (readew)
    *wgtflag += 1;
  if (readvw)
    *wgtflag += 2;

  if (ncon > 0 && !readvw) {
    mprintf("------------------------------------------------------------------------------\n");
    mprintf("***  I detected an error in your input file  ***\n\n");
    mprintf("You specified ncon=%D, but the fmt parameter does not specify vertex weights\n", ncon);
    mprintf("Make sure that the fmt parameter is set to either 10 or 11.\n");
    mprintf("------------------------------------------------------------------------------\n");
    exit(0);
  }

  graph->nedges *=2;
  ncon = graph->ncon = (ncon == 0 ? 1 : ncon);

  /*mprintf("%D %D %D %D %D [%D %D]\n", fmt, fmt%10, (fmt/10)%10, ncon, graph->ncon, readew, readvw);*/

  if (graph->nvtxs > MAXIDX) 
    errexit("\nThe matrix is too big: %d [%d %d]\n", graph->nvtxs, MAXIDX, sizeof(idxtype));

  xadj = graph->xadj = idxsmalloc(graph->nvtxs+1, 0, "ReadGraph: xadj");
  adjncy = graph->adjncy = idxmalloc(graph->nedges, "ReadGraph: adjncy");

  vwgt = graph->vwgt = (readvw ? idxmalloc(ncon*graph->nvtxs, "ReadGraph: vwgt") : NULL);
  adjwgt = graph->adjwgt = (readew ? idxmalloc(graph->nedges, "ReadGraph: adjwgt") : NULL);

  /* Start reading the graph file */
  for (xadj[0]=0, k=0, i=0; i<graph->nvtxs; i++) {
    do {
      fgets(line, MAXLINE, fpin);
    } while (line[0] == '%' && !feof(fpin));
    oldstr = line;
    newstr = NULL;

    if (strlen(line) == MAXLINE) 
      errexit("\nBuffer for fgets not big enough!\n");

    if (readvw) {
      for (l=0; l<ncon; l++) {
        vwgt[i*ncon+l] = strtoidx(oldstr, &newstr, 10);
        oldstr = newstr;
      }
    }

    for (;;) {
      edge = strtoidx(oldstr, &newstr, 10) -1;
      oldstr = newstr;

      if (readew) {
        ewgt = strtoidx(oldstr, &newstr, 10);
        oldstr = newstr;
      }

      if (edge < 0)
        break;

      adjncy[k] = edge;
      if (readew) 
        adjwgt[k] = ewgt;
      k++;
    } 
    xadj[i+1] = k;
  }

  gk_fclose(fpin);

  if (k != graph->nedges) {
    mprintf("------------------------------------------------------------------------------\n");
    mprintf("***  I detected an error in your input file  ***\n\n");
    mprintf("In the first line of the file, you specified that the graph contained\n%D edges. However, I only found %D edges in the file.\n", graph->nedges/2, k/2);
    if (2*k == graph->nedges) {
      mprintf("\n *> I detected that you specified twice the number of edges that you have in\n");
      mprintf("    the file. Remember that the number of edges specified in the first line\n");
      mprintf("    counts each edge between vertices v and u only once.\n\n");
    }
    mprintf("Please specify the correct number of edges in the first line of the file.\n");
    mprintf("------------------------------------------------------------------------------\n");
    exit(0);
  }

  gk_free((void **)&line, LTERM);
}
Example #7
0
int main(int argc, char *argv[])
{
  ssize_t i, j, niter;
  params_t *params;
  gk_csr_t *mat;
  FILE *fpout;
 
  /* get command-line options */
  params = parse_cmdline(argc, argv);

  /* read the data */
  mat = gk_csr_Read(params->infile, GK_CSR_FMT_METIS, 1, 1);

  /* display some basic stats */
  print_init_info(params, mat);



  if (params->ntvs != -1) {
    /* compute the pr for different randomly generated restart-distribution vectors */
    float **prs;

    prs = gk_fAllocMatrix(params->ntvs, mat->nrows, 0.0, "main: prs");

    /* generate the random restart vectors */
    for (j=0; j<params->ntvs; j++) {
      for (i=0; i<mat->nrows; i++)
        prs[j][i] = RandomInRange(931);
      gk_fscale(mat->nrows, 1.0/gk_fsum(mat->nrows, prs[j], 1), prs[j], 1);

      niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, prs[j]);
      printf("tvs#: %zd; niters: %zd\n", j, niter);
    }

    /* output the computed pr scores */
    fpout = gk_fopen(params->outfile, "w", "main: outfile");
    for (i=0; i<mat->nrows; i++) {
      for (j=0; j<params->ntvs; j++) 
        fprintf(fpout, "%.4e ", prs[j][i]);
      fprintf(fpout, "\n");
    }
    gk_fclose(fpout);

    gk_fFreeMatrix(&prs, params->ntvs, mat->nrows);
  }
  else if (params->ppr != -1) {
    /* compute the personalized pr from the specified vertex */
    float *pr;

    pr = gk_fsmalloc(mat->nrows, 0.0, "main: pr");

    pr[params->ppr-1] = 1.0;

    niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr);
    printf("ppr: %d; niters: %zd\n", params->ppr, niter);

    /* output the computed pr scores */
    fpout = gk_fopen(params->outfile, "w", "main: outfile");
    for (i=0; i<mat->nrows; i++) 
      fprintf(fpout, "%.4e\n", pr[i]);
    gk_fclose(fpout);

    gk_free((void **)&pr, LTERM);
  }
  else {
    /* compute the standard pr */
    int jmax;
    float diff, maxdiff;
    float *pr;

    pr = gk_fsmalloc(mat->nrows, 1.0/mat->nrows, "main: pr");

    niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr);
    printf("pr; niters: %zd\n", niter);

    /* output the computed pr scores */
    fpout = gk_fopen(params->outfile, "w", "main: outfile");
    for (i=0; i<mat->nrows; i++) {
      for (jmax=i, maxdiff=0.0, j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) {
        if ((diff = fabs(pr[i]-pr[mat->rowind[j]])) > maxdiff) {
          maxdiff = diff;
          jmax = mat->rowind[j];
        }
      }
      fprintf(fpout, "%.4e %10zd %.4e %10d\n", pr[i], 
          mat->rowptr[i+1]-mat->rowptr[i], maxdiff, jmax+1);
    }
    gk_fclose(fpout);

    gk_free((void **)&pr, LTERM);
  }

  gk_csr_Free(&mat);

  /* display some final stats */
  print_final_info(params);
}
gk_graph_t *gk_graph_Read(char *filename, int format, int isfewgts, 
                int isfvwgts, int isfvsizes)
{
  ssize_t i, k, l;
  size_t nfields, nvtxs, nedges, fmt, ncon, lnlen;
  int32_t ival;
  float fval;
  int readsizes=0, readwgts=0, readvals=0, numbering=0;
  char *line=NULL, *head, *tail, fmtstr[256];
  FILE *fpin=NULL;
  gk_graph_t *graph=NULL;


  if (!gk_fexists(filename)) 
    gk_errexit(SIGERR, "File %s does not exist!\n", filename);

  if (format == GK_GRAPH_FMT_METIS) {
    fpin = gk_fopen(filename, "r", "gk_graph_Read: fpin");
    do {
      if (gk_getline(&line, &lnlen, fpin) <= 0)
        gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename);
    } while (line[0] == '%');

    fmt = ncon = 0;
    nfields = sscanf(line, "%zu %zu %zu %zu", &nvtxs, &nedges, &fmt, &ncon);
    if (nfields < 2)
      gk_errexit(SIGERR, "Header line must contain at least 2 integers (#vtxs and #edges).\n");

    nedges *= 2;

    if (fmt > 111)
      gk_errexit(SIGERR, "Cannot read this type of file format [fmt=%zu]!\n", fmt);

    sprintf(fmtstr, "%03zu", fmt%1000);
    readsizes = (fmtstr[0] == '1');
    readwgts  = (fmtstr[1] == '1');
    readvals  = (fmtstr[2] == '1');
    numbering = 1;
    ncon      = (ncon == 0 ? 1 : ncon);
  }
  else {
    gk_errexit(SIGERR, "Unrecognized format: %d\n", format);
  }

  graph = gk_graph_Create();

  graph->nvtxs = nvtxs;

  graph->xadj   = gk_zmalloc(nvtxs+1, "gk_graph_Read: xadj");
  graph->adjncy = gk_i32malloc(nedges, "gk_graph_Read: adjncy");
  if (readvals) {
    if (isfewgts)
      graph->fadjwgt = gk_fmalloc(nedges, "gk_graph_Read: fadjwgt");
    else
      graph->iadjwgt = gk_i32malloc(nedges, "gk_graph_Read: iadjwgt");
  }

  if (readsizes) {
    if (isfvsizes)
      graph->fvsizes = gk_fmalloc(nvtxs, "gk_graph_Read: fvsizes");
    else
      graph->ivsizes = gk_i32malloc(nvtxs, "gk_graph_Read: ivsizes");
  }

  if (readwgts) {
    if (isfvwgts)
      graph->fvwgts = gk_fmalloc(nvtxs*ncon, "gk_graph_Read: fvwgts");
    else
      graph->ivwgts = gk_i32malloc(nvtxs*ncon, "gk_graph_Read: ivwgts");
  }


  /*----------------------------------------------------------------------
   * Read the sparse graph file
   *---------------------------------------------------------------------*/
  numbering = (numbering ? - 1 : 0);
  for (graph->xadj[0]=0, k=0, i=0; i<nvtxs; i++) {
    do {
      if (gk_getline(&line, &lnlen, fpin) == -1)
        gk_errexit(SIGERR, "Pregraphure end of input file: file while reading row %d\n", i);
    } while (line[0] == '%');

    head = line;
    tail = NULL;

    /* Read vertex sizes */
    if (readsizes) {
      if (isfvsizes) {
#ifdef __MSC__
        graph->fvsizes[i] = (float)strtod(head, &tail);
#else
        graph->fvsizes[i] = strtof(head, &tail);
#endif
        if (tail == head)
          gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1);
        if (graph->fvsizes[i] < 0)
          gk_errexit(SIGERR, "The size for vertex %zd must be >= 0\n", i+1);
      }
      else {
        graph->ivsizes[i] = strtol(head, &tail, 0);
        if (tail == head)
          gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1);
        if (graph->ivsizes[i] < 0)
          gk_errexit(SIGERR, "The size for vertex %zd must be >= 0\n", i+1);
      }
      head = tail;
    }

    /* Read vertex weights */
    if (readwgts) {
      for (l=0; l<ncon; l++) {
        if (isfvwgts) {
#ifdef __MSC__
          graph->fvwgts[i*ncon+l] = (float)strtod(head, &tail);
#else
          graph->fvwgts[i*ncon+l] = strtof(head, &tail);
#endif
          if (tail == head)
            gk_errexit(SIGERR, "The line for vertex %zd does not have enough weights "
                    "for the %d constraints.\n", i+1, ncon);
          if (graph->fvwgts[i*ncon+l] < 0)
            gk_errexit(SIGERR, "The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l);
        }
        else {
          graph->ivwgts[i*ncon+l] = strtol(head, &tail, 0);
          if (tail == head)
            gk_errexit(SIGERR, "The line for vertex %zd does not have enough weights "
                    "for the %d constraints.\n", i+1, ncon);
          if (graph->ivwgts[i*ncon+l] < 0)
            gk_errexit(SIGERR, "The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l);
        }
        head = tail;
      }
    }

   
    /* Read the rest of the row */
    while (1) {
      ival = (int)strtol(head, &tail, 0);
      if (tail == head) 
        break;
      head = tail;
      
      if ((graph->adjncy[k] = ival + numbering) < 0)
        gk_errexit(SIGERR, "Error: Invalid column number %d at row %zd.\n", ival, i);

      if (readvals) {
        if (isfewgts) {
#ifdef __MSC__
          fval = (float)strtod(head, &tail);
#else
    	  fval = strtof(head, &tail);
#endif
          if (tail == head)
            gk_errexit(SIGERR, "Value could not be found for edge! Vertex:%zd, NNZ:%zd\n", i, k);

          graph->fadjwgt[k] = fval;
        }
        else {
    	  ival = strtol(head, &tail, 0);
          if (tail == head)
            gk_errexit(SIGERR, "Value could not be found for edge! Vertex:%zd, NNZ:%zd\n", i, k);

          graph->iadjwgt[k] = ival;
        }
        head = tail;
      }
      k++;
    }
    graph->xadj[i+1] = k;
  }

  if (k != nedges)
    gk_errexit(SIGERR, "gk_graph_Read: Something wrong with the number of edges in "
                       "the input file. nedges=%zd, Actualnedges=%zd.\n", nedges, k);

  gk_fclose(fpin);

  gk_free((void **)&line, LTERM);

  return graph;
}