gk_graph_t *gk_graph_ExtractPartition(gk_graph_t *graph, int *part, int pid) { ssize_t i, j, nnz; gk_graph_t *ngraph; ngraph = gk_graph_Create(); ngraph->nrows = 0; ngraph->ncols = graph->ncols; for (nnz=0, i=0; i<graph->nrows; i++) { if (part[i] == pid) { ngraph->nrows++; nnz += graph->rowptr[i+1]-graph->rowptr[i]; } } ngraph->rowptr = gk_zmalloc(ngraph->nrows+1, "gk_graph_ExtractPartition: rowptr"); ngraph->rowind = gk_imalloc(nnz, "gk_graph_ExtractPartition: rowind"); ngraph->rowval = gk_fmalloc(nnz, "gk_graph_ExtractPartition: rowval"); ngraph->rowptr[0] = 0; for (nnz=0, j=0, i=0; i<graph->nrows; i++) { if (part[i] == pid) { gk_icopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowind+graph->rowptr[i], ngraph->rowind+nnz); gk_fcopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowval+graph->rowptr[i], ngraph->rowval+nnz); nnz += graph->rowptr[i+1]-graph->rowptr[i]; ngraph->rowptr[++j] = nnz; } } ASSERT(j == ngraph->nrows); return ngraph; }
gk_graph_t *gk_graph_ExtractRows(gk_graph_t *graph, int nrows, int *rind) { ssize_t i, ii, j, nnz; gk_graph_t *ngraph; ngraph = gk_graph_Create(); ngraph->nrows = nrows; ngraph->ncols = graph->ncols; for (nnz=0, i=0; i<nrows; i++) nnz += graph->rowptr[rind[i]+1]-graph->rowptr[rind[i]]; ngraph->rowptr = gk_zmalloc(ngraph->nrows+1, "gk_graph_ExtractPartition: rowptr"); ngraph->rowind = gk_imalloc(nnz, "gk_graph_ExtractPartition: rowind"); ngraph->rowval = gk_fmalloc(nnz, "gk_graph_ExtractPartition: rowval"); ngraph->rowptr[0] = 0; for (nnz=0, j=0, ii=0; ii<nrows; ii++) { i = rind[ii]; gk_icopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowind+graph->rowptr[i], ngraph->rowind+nnz); gk_fcopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowval+graph->rowptr[i], ngraph->rowval+nnz); nnz += graph->rowptr[i+1]-graph->rowptr[i]; ngraph->rowptr[++j] = nnz; } ASSERT(j == ngraph->nrows); return ngraph; }
gk_graph_t *gk_graph_ExtractSubgraph(gk_graph_t *graph, int vstart, int nvtxs) { ssize_t i; gk_graph_t *ngraph; if (vstart+nvtxs > graph->nvtxs) return NULL; ngraph = gk_graph_Create(); ngraph->nvtxs = nvtxs; /* copy the adjancy structure */ if (graph->xadj) ngraph->xadj = gk_zcopy(nvtxs+1, graph->xadj+vstart, gk_zmalloc(nvtxs+1, "gk_graph_ExtractSubgraph: xadj")); for (i=nvtxs; i>=0; i--) ngraph->xadj[i] -= ngraph->xadj[0]; ASSERT(ngraph->xadj[0] == 0); if (graph->ivwgts) ngraph->ivwgts = gk_i32copy(nvtxs, graph->ivwgts+vstart, gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: ivwgts")); if (graph->ivsizes) ngraph->ivsizes = gk_i32copy(nvtxs, graph->ivsizes+vstart, gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: ivsizes")); if (graph->vlabels) ngraph->vlabels = gk_i32copy(nvtxs, graph->vlabels+vstart, gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: vlabels")); if (graph->fvwgts) ngraph->fvwgts = gk_fcopy(nvtxs, graph->fvwgts+vstart, gk_fmalloc(nvtxs, "gk_graph_ExtractSubgraph: fvwgts")); if (graph->fvsizes) ngraph->fvsizes = gk_fcopy(nvtxs, graph->fvsizes+vstart, gk_fmalloc(nvtxs, "gk_graph_ExtractSubgraph: fvsizes")); ASSERT(ngraph->xadj[nvtxs] == graph->xadj[vstart+nvtxs]-graph->xadj[vstart]); if (graph->adjncy) ngraph->adjncy = gk_i32copy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], graph->adjncy+graph->xadj[vstart], gk_i32malloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], "gk_graph_ExtractSubgraph: adjncy")); if (graph->iadjwgt) ngraph->iadjwgt = gk_i32copy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], graph->iadjwgt+graph->xadj[vstart], gk_i32malloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], "gk_graph_ExtractSubgraph: iadjwgt")); if (graph->fadjwgt) ngraph->fadjwgt = gk_fcopy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], graph->fadjwgt+graph->xadj[vstart], gk_fmalloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], "gk_graph_ExtractSubgraph: fadjwgt")); return ngraph; }
void gk_find_frequent_itemsets(int ntrans, int *tranptr, int *tranind, int minfreq, int maxfreq, int minlen, int maxlen, void (*process_itemset)(void *stateptr, int nitems, int *itemids, int ntrans, int *transids), void *stateptr) { ssize_t i; gk_csr_t *mat, *pmat; isparams_t params; int *pattern; /* Create the matrix */ mat = gk_csr_Create(); mat->nrows = ntrans; mat->ncols = tranind[gk_iargmax(tranptr[ntrans], tranind)]+1; mat->rowptr = gk_zmalloc(ntrans+1, "gk_find_frequent_itemsets: mat.rowptr"); for (i=0; i<ntrans+1; i++) mat->rowptr[i] = tranptr[i]; mat->rowind = gk_icopy(tranptr[ntrans], tranind, gk_imalloc(tranptr[ntrans], "gk_find_frequent_itemsets: mat.rowind")); mat->colids = gk_iincset(mat->ncols, 0, gk_imalloc(mat->ncols, "gk_find_frequent_itemsets: mat.colids")); /* Setup the parameters */ params.minfreq = minfreq; params.maxfreq = (maxfreq == -1 ? mat->nrows : maxfreq); params.minlen = minlen; params.maxlen = (maxlen == -1 ? mat->ncols : maxlen); params.tnitems = mat->ncols; params.callback = process_itemset; params.stateptr = stateptr; params.rmarker = gk_ismalloc(mat->nrows, 0, "gk_find_frequent_itemsets: rmarker"); params.cand = gk_ikvmalloc(mat->ncols, "gk_find_frequent_itemsets: cand"); /* Perform the initial projection */ gk_csr_CreateIndex(mat, GK_CSR_COL); pmat = itemsets_project_matrix(¶ms, mat, -1); gk_csr_Free(&mat); pattern = gk_imalloc(pmat->ncols, "gk_find_frequent_itemsets: pattern"); itemsets_find_frequent_itemsets(¶ms, pmat, 0, pattern); gk_csr_Free(&pmat); gk_free((void **)&pattern, ¶ms.rmarker, ¶ms.cand, LTERM); }
gk_graph_t *gk_graph_Dup(gk_graph_t *graph) { gk_graph_t *ngraph; ngraph = gk_graph_Create(); ngraph->nvtxs = graph->nvtxs; /* copy the adjacency structure */ if (graph->xadj) ngraph->xadj = gk_zcopy(graph->nvtxs+1, graph->xadj, gk_zmalloc(graph->nvtxs+1, "gk_graph_Dup: xadj")); if (graph->ivwgts) ngraph->ivwgts = gk_i32copy(graph->nvtxs, graph->ivwgts, gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivwgts")); if (graph->ivsizes) ngraph->ivsizes = gk_i32copy(graph->nvtxs, graph->ivsizes, gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivsizes")); if (graph->vlabels) ngraph->vlabels = gk_i32copy(graph->nvtxs, graph->vlabels, gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivlabels")); if (graph->fvwgts) ngraph->fvwgts = gk_fcopy(graph->nvtxs, graph->fvwgts, gk_fmalloc(graph->nvtxs, "gk_graph_Dup: fvwgts")); if (graph->fvsizes) ngraph->fvsizes = gk_fcopy(graph->nvtxs, graph->fvsizes, gk_fmalloc(graph->nvtxs, "gk_graph_Dup: fvsizes")); if (graph->adjncy) ngraph->adjncy = gk_i32copy(graph->xadj[graph->nvtxs], graph->adjncy, gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: adjncy")); if (graph->iadjwgt) ngraph->iadjwgt = gk_i32copy(graph->xadj[graph->nvtxs], graph->iadjwgt, gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: iadjwgt")); if (graph->fadjwgt) ngraph->fadjwgt = gk_fcopy(graph->xadj[graph->nvtxs], graph->fadjwgt, gk_fmalloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: fadjwgt")); return ngraph; }
gk_csr_t *itemsets_project_matrix(isparams_t *params, gk_csr_t *mat, int cid) { ssize_t i, j, k, ii, pnnz; int nrows, ncols, pnrows, pncols; ssize_t *colptr, *pcolptr; int *colind, *colids, *pcolind, *pcolids, *rmarker; gk_csr_t *pmat; gk_ikv_t *cand; nrows = mat->nrows; ncols = mat->ncols; colptr = mat->colptr; colind = mat->colind; colids = mat->colids; rmarker = params->rmarker; cand = params->cand; /* Allocate space for the projected matrix based on what you know thus far */ pmat = gk_csr_Create(); pmat->nrows = pnrows = (cid == -1 ? nrows : colptr[cid+1]-colptr[cid]); /* Mark the rows that will be kept and determine the prowids */ if (cid == -1) { /* Initial projection */ gk_iset(nrows, 1, rmarker); } else { /* The other projections */ for (i=colptr[cid]; i<colptr[cid+1]; i++) rmarker[colind[i]] = 1; } /* Determine the length of each column that will be left in the projected matrix */ for (pncols=0, pnnz=0, i=cid+1; i<ncols; i++) { for (k=0, j=colptr[i]; j<colptr[i+1]; j++) { k += rmarker[colind[j]]; } if (k >= params->minfreq && k <= params->maxfreq) { cand[pncols].val = i; cand[pncols++].key = k; pnnz += k; } } /* Sort the columns in increasing order */ gk_ikvsorti(pncols, cand); /* Allocate space for the remaining fields of the projected matrix */ pmat->ncols = pncols; pmat->colids = pcolids = gk_imalloc(pncols, "itemsets_project_matrix: pcolids"); pmat->colptr = pcolptr = gk_zmalloc(pncols+1, "itemsets_project_matrix: pcolptr"); pmat->colind = pcolind = gk_imalloc(pnnz, "itemsets_project_matrix: pcolind"); /* Populate the projected matrix */ pcolptr[0] = 0; for (pnnz=0, ii=0; ii<pncols; ii++) { i = cand[ii].val; for (j=colptr[i]; j<colptr[i+1]; j++) { if (rmarker[colind[j]]) pcolind[pnnz++] = colind[j]; } pcolids[ii] = colids[i]; pcolptr[ii+1] = pnnz; } /* Reset the rmarker array */ if (cid == -1) { /* Initial projection */ gk_iset(nrows, 0, rmarker); } else { /* The other projections */ for (i=colptr[cid]; i<colptr[cid+1]; i++) rmarker[colind[i]] = 0; } return pmat; }
gk_graph_t *gk_graph_Read(char *filename, int format, int isfewgts, int isfvwgts, int isfvsizes) { ssize_t i, k, l; size_t nfields, nvtxs, nedges, fmt, ncon, lnlen; int32_t ival; float fval; int readsizes=0, readwgts=0, readvals=0, numbering=0; char *line=NULL, *head, *tail, fmtstr[256]; FILE *fpin=NULL; gk_graph_t *graph=NULL; if (!gk_fexists(filename)) gk_errexit(SIGERR, "File %s does not exist!\n", filename); if (format == GK_GRAPH_FMT_METIS) { fpin = gk_fopen(filename, "r", "gk_graph_Read: fpin"); do { if (gk_getline(&line, &lnlen, fpin) <= 0) gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename); } while (line[0] == '%'); fmt = ncon = 0; nfields = sscanf(line, "%zu %zu %zu %zu", &nvtxs, &nedges, &fmt, &ncon); if (nfields < 2) gk_errexit(SIGERR, "Header line must contain at least 2 integers (#vtxs and #edges).\n"); nedges *= 2; if (fmt > 111) gk_errexit(SIGERR, "Cannot read this type of file format [fmt=%zu]!\n", fmt); sprintf(fmtstr, "%03zu", fmt%1000); readsizes = (fmtstr[0] == '1'); readwgts = (fmtstr[1] == '1'); readvals = (fmtstr[2] == '1'); numbering = 1; ncon = (ncon == 0 ? 1 : ncon); } else { gk_errexit(SIGERR, "Unrecognized format: %d\n", format); } graph = gk_graph_Create(); graph->nvtxs = nvtxs; graph->xadj = gk_zmalloc(nvtxs+1, "gk_graph_Read: xadj"); graph->adjncy = gk_i32malloc(nedges, "gk_graph_Read: adjncy"); if (readvals) { if (isfewgts) graph->fadjwgt = gk_fmalloc(nedges, "gk_graph_Read: fadjwgt"); else graph->iadjwgt = gk_i32malloc(nedges, "gk_graph_Read: iadjwgt"); } if (readsizes) { if (isfvsizes) graph->fvsizes = gk_fmalloc(nvtxs, "gk_graph_Read: fvsizes"); else graph->ivsizes = gk_i32malloc(nvtxs, "gk_graph_Read: ivsizes"); } if (readwgts) { if (isfvwgts) graph->fvwgts = gk_fmalloc(nvtxs*ncon, "gk_graph_Read: fvwgts"); else graph->ivwgts = gk_i32malloc(nvtxs*ncon, "gk_graph_Read: ivwgts"); } /*---------------------------------------------------------------------- * Read the sparse graph file *---------------------------------------------------------------------*/ numbering = (numbering ? - 1 : 0); for (graph->xadj[0]=0, k=0, i=0; i<nvtxs; i++) { do { if (gk_getline(&line, &lnlen, fpin) == -1) gk_errexit(SIGERR, "Pregraphure end of input file: file while reading row %d\n", i); } while (line[0] == '%'); head = line; tail = NULL; /* Read vertex sizes */ if (readsizes) { if (isfvsizes) { #ifdef __MSC__ graph->fvsizes[i] = (float)strtod(head, &tail); #else graph->fvsizes[i] = strtof(head, &tail); #endif if (tail == head) gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1); if (graph->fvsizes[i] < 0) gk_errexit(SIGERR, "The size for vertex %zd must be >= 0\n", i+1); } else { graph->ivsizes[i] = strtol(head, &tail, 0); if (tail == head) gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1); if (graph->ivsizes[i] < 0) gk_errexit(SIGERR, "The size for vertex %zd must be >= 0\n", i+1); } head = tail; } /* Read vertex weights */ if (readwgts) { for (l=0; l<ncon; l++) { if (isfvwgts) { #ifdef __MSC__ graph->fvwgts[i*ncon+l] = (float)strtod(head, &tail); #else graph->fvwgts[i*ncon+l] = strtof(head, &tail); #endif if (tail == head) gk_errexit(SIGERR, "The line for vertex %zd does not have enough weights " "for the %d constraints.\n", i+1, ncon); if (graph->fvwgts[i*ncon+l] < 0) gk_errexit(SIGERR, "The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l); } else { graph->ivwgts[i*ncon+l] = strtol(head, &tail, 0); if (tail == head) gk_errexit(SIGERR, "The line for vertex %zd does not have enough weights " "for the %d constraints.\n", i+1, ncon); if (graph->ivwgts[i*ncon+l] < 0) gk_errexit(SIGERR, "The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l); } head = tail; } } /* Read the rest of the row */ while (1) { ival = (int)strtol(head, &tail, 0); if (tail == head) break; head = tail; if ((graph->adjncy[k] = ival + numbering) < 0) gk_errexit(SIGERR, "Error: Invalid column number %d at row %zd.\n", ival, i); if (readvals) { if (isfewgts) { #ifdef __MSC__ fval = (float)strtod(head, &tail); #else fval = strtof(head, &tail); #endif if (tail == head) gk_errexit(SIGERR, "Value could not be found for edge! Vertex:%zd, NNZ:%zd\n", i, k); graph->fadjwgt[k] = fval; } else { ival = strtol(head, &tail, 0); if (tail == head) gk_errexit(SIGERR, "Value could not be found for edge! Vertex:%zd, NNZ:%zd\n", i, k); graph->iadjwgt[k] = ival; } head = tail; } k++; } graph->xadj[i+1] = k; } if (k != nedges) gk_errexit(SIGERR, "gk_graph_Read: Something wrong with the number of edges in " "the input file. nedges=%zd, Actualnedges=%zd.\n", nedges, k); gk_fclose(fpin); gk_free((void **)&line, LTERM); return graph; }
gk_graph_t *gk_graph_Reorder(gk_graph_t *graph, int32_t *perm, int32_t *iperm) { ssize_t j, jj, *xadj; int i, k, u, v, nvtxs; int freeperm=0, freeiperm=0; int32_t *adjncy; gk_graph_t *ngraph; if (perm == NULL && iperm == NULL) return NULL; ngraph = gk_graph_Create(); ngraph->nvtxs = nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; /* allocate memory for the different structures that are present in graph */ if (graph->xadj) ngraph->xadj = gk_zmalloc(nvtxs+1, "gk_graph_Reorder: xadj"); if (graph->ivwgts) ngraph->ivwgts = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivwgts"); if (graph->ivsizes) ngraph->ivsizes = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivsizes"); if (graph->vlabels) ngraph->vlabels = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivlabels"); if (graph->fvwgts) ngraph->fvwgts = gk_fmalloc(nvtxs, "gk_graph_Reorder: fvwgts"); if (graph->fvsizes) ngraph->fvsizes = gk_fmalloc(nvtxs, "gk_graph_Reorder: fvsizes"); if (graph->adjncy) ngraph->adjncy = gk_i32malloc(graph->xadj[nvtxs], "gk_graph_Reorder: adjncy"); if (graph->iadjwgt) ngraph->iadjwgt = gk_i32malloc(graph->xadj[nvtxs], "gk_graph_Reorder: iadjwgt"); if (graph->fadjwgt) ngraph->fadjwgt = gk_fmalloc(graph->xadj[nvtxs], "gk_graph_Reorder: fadjwgt"); /* create perm/iperm if not provided */ if (perm == NULL) { freeperm = 1; perm = gk_i32malloc(nvtxs, "gk_graph_Reorder: perm"); for (i=0; i<nvtxs; i++) perm[iperm[i]] = i; } if (iperm == NULL) { freeiperm = 1; iperm = gk_i32malloc(nvtxs, "gk_graph_Reorder: iperm"); for (i=0; i<nvtxs; i++) iperm[perm[i]] = i; } /* fill-in the information of the re-ordered graph */ ngraph->xadj[0] = jj = 0; for (v=0; v<nvtxs; v++) { u = iperm[v]; for (j=xadj[u]; j<xadj[u+1]; j++, jj++) { ngraph->adjncy[jj] = perm[adjncy[j]]; if (graph->iadjwgt) ngraph->iadjwgt[jj] = graph->iadjwgt[j]; if (graph->fadjwgt) ngraph->fadjwgt[jj] = graph->fadjwgt[j]; } if (graph->ivwgts) ngraph->ivwgts[v] = graph->ivwgts[u]; if (graph->fvwgts) ngraph->fvwgts[v] = graph->fvwgts[u]; if (graph->ivsizes) ngraph->ivsizes[v] = graph->ivsizes[u]; if (graph->fvsizes) ngraph->fvsizes[v] = graph->fvsizes[u]; if (graph->vlabels) ngraph->vlabels[v] = graph->vlabels[u]; ngraph->xadj[v+1] = jj; } /* free memory */ if (freeperm) gk_free((void **)&perm, LTERM); if (freeiperm) gk_free((void **)&iperm, LTERM); return ngraph; }
gk_graph_t *gk_graph_Prune(gk_graph_t *graph, int what, int minf, int maxf) { ssize_t i, j, nnz; int nrows, ncols; ssize_t *rowptr, *nrowptr; int *rowind, *nrowind, *collen; float *rowval, *nrowval; gk_graph_t *ngraph; ngraph = gk_graph_Create(); nrows = ngraph->nrows = graph->nrows; ncols = ngraph->ncols = graph->ncols; rowptr = graph->rowptr; rowind = graph->rowind; rowval = graph->rowval; nrowptr = ngraph->rowptr = gk_zmalloc(nrows+1, "gk_graph_Prune: nrowptr"); nrowind = ngraph->rowind = gk_imalloc(rowptr[nrows], "gk_graph_Prune: nrowind"); nrowval = ngraph->rowval = gk_fmalloc(rowptr[nrows], "gk_graph_Prune: nrowval"); switch (what) { case GK_CSR_COL: collen = gk_ismalloc(ncols, 0, "gk_graph_Prune: collen"); for (i=0; i<nrows; i++) { for (j=rowptr[i]; j<rowptr[i+1]; j++) { ASSERT(rowind[j] < ncols); collen[rowind[j]]++; } } for (i=0; i<ncols; i++) collen[i] = (collen[i] >= minf && collen[i] <= maxf ? 1 : 0); nrowptr[0] = 0; for (nnz=0, i=0; i<nrows; i++) { for (j=rowptr[i]; j<rowptr[i+1]; j++) { if (collen[rowind[j]]) { nrowind[nnz] = rowind[j]; nrowval[nnz] = rowval[j]; nnz++; } } nrowptr[i+1] = nnz; } gk_free((void **)&collen, LTERM); break; case GK_CSR_ROW: nrowptr[0] = 0; for (nnz=0, i=0; i<nrows; i++) { if (rowptr[i+1]-rowptr[i] >= minf && rowptr[i+1]-rowptr[i] <= maxf) { for (j=rowptr[i]; j<rowptr[i+1]; j++, nnz++) { nrowind[nnz] = rowind[j]; nrowval[nnz] = rowval[j]; } } nrowptr[i+1] = nnz; } break; default: gk_graph_Free(&ngraph); gk_errexit(SIGERR, "Unknown prunning type of %d\n", what); return NULL; } return ngraph; }