void Refine2Way(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph, real_t *tpwgts) { IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startwctimer(ctrl->UncoarsenTmr)); /* Compute the parameters of the coarsest graph */ Compute2WayPartitionParams(ctrl, graph); for (;;) { ASSERT(CheckBnd(graph)); IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startwctimer(ctrl->RefTmr)); Balance2Way(ctrl, graph, tpwgts); FM_2WayRefine(ctrl, graph, tpwgts, ctrl->niter); IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopwctimer(ctrl->RefTmr)); if (graph == orggraph) break; graph = graph->finer; IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startwctimer(ctrl->ProjectTmr)); Project2WayPartition(ctrl, graph); IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopwctimer(ctrl->ProjectTmr)); } IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopwctimer(ctrl->UncoarsenTmr)); }
int main(int argc, char *argv[]) { params_t par; double timer_scan; gk_clearwctimer(par.timer_global); gk_clearwctimer(par.timer_1); gk_clearwctimer(par.timer_2); gk_clearwctimer(par.timer_3); gk_clearwctimer(par.timer_4); gk_startwctimer(par.timer_global); int k; printf("\nScan - OMP_Scan\n"); gk_startwctimer(par.timer_4); cmd_parse(argc, argv, &par); gk_stopwctimer(par.timer_4); memcpy(par.a,par.b,(par.nalloc + 1)*sizeof(int)); OMP_Scan(&par); timer_scan=par.timer_2; for (k = 0; k < EXATRA; ++k) { memcpy(par.a,par.b,(par.nalloc + 1)*sizeof(int)); OMP_Scan(&par); timer_scan=MIN(timer_scan,par.timer_2); } par.timer_2=timer_scan; for (k = 0; k < EXATRA; ++k) { Scan_Serial_Seq(&par); } WriteOut(&par); for (k = 0; k < EXATRA; ++k) { OMP_Sscan(&par); } gk_stopwctimer(par.timer_global); printf(" wclock (sec): \t%.8lf\n", gk_getwctimer(par.timer_global)); printf(" timer4 Init (sec): \t%.8lf\n", gk_getwctimer(par.timer_4)); printf(" timer3 Serial (sec) on %d runs: \t%.8lf\n", EXATRA, gk_getwctimer(par.timer_3) ); printf(" timer2 Scan (sec) on %d runs: \t%.8lf\n", EXATRA, gk_getwctimer(par.timer_2) ); printf(" timer1 Sscan (sec) on %d runs: \t%.8lf\n", EXATRA, gk_getwctimer(par.timer_1) ); cleanup(&par); return 0; }
int main(int argc, char *argv[]) { params_t params; int rc = EXIT_SUCCESS; cmdline_parse(¶ms, argc, argv); printf("********************************************************************************\n"); printf("sd (%d.%d.%d) Copyright 2011, GK.\n", VER_MAJOR, VER_MINOR, VER_SUBMINOR); printf(" nnbrs=%d, minsim=%.2f\n", params.nnbrs, params.minsim); gk_clearwctimer(params.timer_global); gk_clearwctimer(params.timer_1); gk_clearwctimer(params.timer_2); gk_clearwctimer(params.timer_3); gk_clearwctimer(params.timer_4); gk_startwctimer(params.timer_global); ComputeNeighbors(¶ms); gk_stopwctimer(params.timer_global); printf(" wclock: %.2lfs\n", gk_getwctimer(params.timer_global)); printf(" timer1: %.2lfs\n", gk_getwctimer(params.timer_1)); printf(" timer2: %.2lfs\n", gk_getwctimer(params.timer_2)); printf(" timer3: %.2lfs\n", gk_getwctimer(params.timer_3)); printf(" timer4: %.2lfs\n", gk_getwctimer(params.timer_4)); printf("********************************************************************************\n"); exit(rc); }
void OMP_Sscan(params_t *par) { omp_set_num_threads(par->nthreads); int nlevels = (int) ceil(log(par->nlines) / M_LN2); int d, k, t; int levelstep2d = 1, levelstep2d1; /* ****************** UP SWEEP ******************************/ gk_startwctimer(par->timer_1); for (d = 1; d < par->nlevels; ++d) { levelstep2d1 = levelstep2d * 2; #pragma omp parallel for shared(par,levelstep2d,levelstep2d1) \ schedule(static) for (k = 0; k < par->nalloc; k = k + levelstep2d1) { if (!par->f[k + levelstep2d1 - 1]) par->a[k + levelstep2d1 - 1] = par->a[k + levelstep2d - 1] + par->a[k + levelstep2d1 - 1]; par->f[k + levelstep2d1 - 1] = par->f[k + levelstep2d - 1] | par->f[k + levelstep2d1 - 1]; } levelstep2d = levelstep2d * 2; } /* ****************** DOWN SWEEP ******************************/ par->a[par->nalloc - 1] = 0; levelstep2d = par->nalloc / 2; for (d = par->nlevels - 1; d >= 0; --d) { levelstep2d1 = levelstep2d * 2; #pragma omp parallel for shared(par,levelstep2d,levelstep2d1) \ schedule(static) for (k = 0; k < par->nalloc; k = k + levelstep2d1) { t = par->a[k + levelstep2d - 1]; par->a[k + levelstep2d - 1] = par->a[k + levelstep2d1 - 1]; if (par->fb[k + levelstep2d]) { par->a[k + levelstep2d1 - 1] = 0; } else if (par->f[k + levelstep2d - 1]) { par->a[k + levelstep2d1 - 1] = t; } else { par->a[k + levelstep2d1 - 1] = t + par->a[k + levelstep2d1 - 1]; } par->f[k + levelstep2d - 1] = 0; } levelstep2d = levelstep2d / 2; } #pragma omp parallel for shared(par,levelstep2d,levelstep2d1) \ schedule(static) for (k = 0; k < par->nlines; k = k + 1) { par->a[k] = par->b[k] + par->a[k]; } gk_stopwctimer(par->timer_1); par->c = par->a; }
void Scan_Serial_Seq(params_t * par) { int i; gk_startwctimer(par->timer_3); par->d = (int*) malloc(sizeof(int) * par->nlines); par->d[0] = par->b[0]; for (i = 1; i < par->nlines; ++i) { par->d[i] = par->d[i - 1] + par->b[i]; } gk_stopwctimer(par->timer_3); }
int METIS_PartGraphKway(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *objval, idx_t *part) { int sigrval=0, renumber=0; graph_t *graph; ctrl_t *ctrl; /* set up malloc cleaning code and signal catchers */ if (!gk_malloc_init()) return METIS_ERROR_MEMORY; gk_sigtrap(); if ((sigrval = gk_sigcatch()) != 0) goto SIGTHROW; /* set up the run parameters */ ctrl = SetupCtrl(METIS_OP_KMETIS, options, *ncon, *nparts, tpwgts, ubvec); if (!ctrl) { gk_siguntrap(); return METIS_ERROR_INPUT; } /* if required, change the numbering to 0 */ if (ctrl->numflag == 1) { Change2CNumbering(*nvtxs, xadj, adjncy); renumber = 1; } /* set up the graph */ graph = SetupGraph(ctrl, *nvtxs, *ncon, xadj, adjncy, vwgt, vsize, adjwgt); /* set up multipliers for making balance computations easier */ SetupKWayBalMultipliers(ctrl, graph); /* set various run parameters that depend on the graph */ if (ctrl->iptype == METIS_IPTYPE_METISRB) { ctrl->CoarsenTo = gk_max((*nvtxs)/(40*gk_log2(*nparts)), 30*(*nparts)); ctrl->CoarsenTo = 10*(*nparts); ctrl->nIparts = (ctrl->CoarsenTo == 30*(*nparts) ? 4 : 5); } else { ctrl->CoarsenTo = 10*(*nparts); ctrl->nIparts = 10; } /* take care contiguity requests for disconnected graphs */ if (ctrl->contig && !IsConnected(graph, 0)) gk_errexit(SIGERR, "METIS Error: A contiguous partition is requested for a non-contiguous input graph.\n"); /* allocate workspace memory */ AllocateWorkSpace(ctrl, graph); /* start the partitioning */ IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl)); IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startwctimer(ctrl->TotalTmr)); *objval = MlevelKWayPartitioning(ctrl, graph, part); IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopwctimer(ctrl->TotalTmr)); IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl)); /* clean up */ FreeCtrl(&ctrl); SIGTHROW: /* if required, change the numbering back to 1 */ if (renumber) Change2FNumbering(*nvtxs, xadj, adjncy, part); gk_siguntrap(); gk_malloc_cleanup(0); return metis_rcode(sigrval); }
idx_t MlevelKWayPartitioning(ctrl_t *ctrl, graph_t *graph, idx_t *part) { idx_t i, objval=0, curobj=0, bestobj=0; real_t curbal=0.0, bestbal=0.0; graph_t *cgraph; for (i=0; i<ctrl->ncuts; i++) { cgraph = CoarsenGraph(ctrl, graph); IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startwctimer(ctrl->InitPartTmr)); AllocateKWayPartitionMemory(ctrl, cgraph); /* Compute the initial partitioning */ switch (ctrl->iptype) { case METIS_IPTYPE_METISRB: FreeWorkSpace(ctrl); /* Release the work space, for the recursive metis call */ InitKWayPartitioningRB(ctrl, cgraph); AllocateWorkSpace(ctrl, graph); /* Re-allocate the work space */ break; case METIS_IPTYPE_GROW: AllocateRefinementWorkSpace(ctrl, 2*cgraph->nedges); InitKWayPartitioningGrow(ctrl, cgraph); break; default: gk_errexit(SIGERR, "Unknown iptype: %d\n", ctrl->iptype); } IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopwctimer(ctrl->InitPartTmr)); IFSET(ctrl->dbglvl, METIS_DBG_IPART, printf("Initial %"PRIDX \ "-way partitioning cut: %"PRIDX"\n", ctrl->nparts, objval)); RefineKWay(ctrl, graph, cgraph); switch (ctrl->objtype) { case METIS_OBJTYPE_CUT: curobj = graph->mincut; break; case METIS_OBJTYPE_VOL: curobj = graph->minvol; break; default: gk_errexit(SIGERR, "Unknown objtype: %d\n", ctrl->objtype); } curbal = ComputeLoadImbalanceDiff(graph, ctrl->nparts, ctrl->pijbm, ctrl->ubfactors); if (i == 0 || (curbal <= 0.0005 && bestobj > curobj) || (bestbal > 0.0005 && curbal < bestbal)) { icopy(graph->nvtxs, graph->where, part); bestobj = curobj; bestbal = curbal; } FreeRData(graph); if (bestobj == 0) break; } FreeGraph(&graph); return bestobj; }
void ComputeNeighbors(params_t *params) { int i, j, nhits; gk_csr_t *mat; int32_t *marker; gk_fkv_t *hits, *cand; FILE *fpout; printf("Reading data for %s...\n", params->infstem); mat = gk_csr_Read(params->infstem, GK_CSR_FMT_CSR, 1, 0); printf("#docs: %d, #nnz: %d.\n", mat->nrows, mat->rowptr[mat->nrows]); /* compact the column-space of the matrices */ gk_csr_CompactColumns(mat); /* perform auxiliary normalizations/pre-computations based on similarity */ gk_csr_Normalize(mat, GK_CSR_ROW, 2); /* create the inverted index */ gk_csr_CreateIndex(mat, GK_CSR_COL); /* create the output file */ fpout = (params->outfile ? gk_fopen(params->outfile, "w", "ComputeNeighbors: fpout") : NULL); /* allocate memory for the necessary working arrays */ hits = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: hits"); marker = gk_i32smalloc(mat->nrows, -1, "ComputeNeighbors: marker"); cand = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: cand"); /* find the best neighbors for each query document */ gk_startwctimer(params->timer_1); for (i=0; i<mat->nrows; i++) { if (params->verbosity > 0) printf("Working on query %7d\n", i); /* find the neighbors of the ith document */ nhits = gk_csr_GetSimilarRows(mat, mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], mat->rowval+mat->rowptr[i], GK_CSR_COS, params->nnbrs, params->minsim, hits, marker, cand); /* write the results in the file */ if (fpout) { for (j=0; j<nhits; j++) fprintf(fpout, "%8d %8d %.3f\n", i, hits[j].val, hits[j].key); } } gk_stopwctimer(params->timer_1); /* cleanup and exit */ if (fpout) gk_fclose(fpout); gk_free((void **)&hits, &marker, &cand, LTERM); gk_csr_Free(&mat); return; }
void FM_2WayNodeRefine1Sided(ctrl_t *ctrl, graph_t *graph, idx_t niter) { idx_t i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind, iend; idx_t *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; idx_t *mptr, *mind, *swaps; rpq_t *queue; nrinfo_t *rinfo; idx_t higain, mincut, initcut, mincutorder; idx_t pass, to, other, limit; idx_t badmaxpwgt, mindiff, newdiff; real_t mult; WCOREPUSH; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; vwgt = graph->vwgt; bndind = graph->bndind; bndptr = graph->bndptr; where = graph->where; pwgts = graph->pwgts; rinfo = graph->nrinfo; queue = rpqCreate(nvtxs); swaps = iwspacemalloc(ctrl, nvtxs); mptr = iwspacemalloc(ctrl, nvtxs+1); mind = iwspacemalloc(ctrl, 2*nvtxs); mult = 0.5*ctrl->ubfactors[0]; badmaxpwgt = (idx_t)(mult*(pwgts[0]+pwgts[1]+pwgts[2])); IFSET(ctrl->dbglvl, METIS_DBG_REFINE, printf("Partitions-N1: [%6"PRIDX" %6"PRIDX"] Nv-Nb[%6"PRIDX" %6"PRIDX"]. ISep: %6"PRIDX"\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); to = (pwgts[0] < pwgts[1] ? 1 : 0); for (pass=0; pass<2*niter; pass++) { /* the 2*niter is for the two sides */ other = to; to = (to+1)%2; rpqReset(queue); mincutorder = -1; initcut = mincut = graph->mincut; nbnd = graph->nbnd; /* use the swaps array in place of the traditional perm array to save memory */ my_irandArrayPermute_r(nbnd, swaps, nbnd, 1, &ctrl->curseed); for (ii=0; ii<nbnd; ii++) { i = bndind[swaps[ii]]; ASSERT(where[i] == 2); rpqInsert(queue, i, vwgt[i]-rinfo[i].edegrees[other]); } ASSERT(CheckNodeBnd(graph, nbnd)); ASSERT(CheckNodePartitionParams(graph)); limit = (ctrl->compress ? gk_min(5*nbnd, 500) : gk_min(3*nbnd, 300)); /****************************************************** * Get into the FM loop *******************************************************/ IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startwctimer(ctrl->Aux3Tmr)); mptr[0] = nmind = 0; mindiff = iabs(pwgts[0]-pwgts[1]); for (nswaps=0; nswaps<nvtxs; nswaps++) { if ((higain = rpqGetTop(queue)) == -1) break; ASSERT(bndptr[higain] != -1); /* The following check is to ensure we break out if there is a posibility of over-running the mind array. */ if (nmind + xadj[higain+1]-xadj[higain] >= 2*nvtxs-1) break; if (pwgts[to]+vwgt[higain] > badmaxpwgt) break; /* No point going any further. Balance will be bad */ pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[other]); newdiff = iabs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other])); if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) { mincut = pwgts[2]; mincutorder = nswaps; mindiff = newdiff; } else { if (nswaps - mincutorder > 3*limit || (nswaps - mincutorder > limit && pwgts[2] > 1.10*mincut)) { pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[other]); break; /* No further improvement, break out */ } } BNDDelete(nbnd, bndind, bndptr, higain); pwgts[to] += vwgt[higain]; where[higain] = to; swaps[nswaps] = higain; /********************************************************** * Update the degrees of the affected nodes ***********************************************************/ IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startwctimer(ctrl->Aux1Tmr)); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */ rinfo[k].edegrees[to] += vwgt[higain]; } else if (where[k] == other) { /* This vertex is pulled into the separator */ ASSERTP(bndptr[k] == -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", k, bndptr[k], where[k])); BNDInsert(nbnd, bndind, bndptr, k); mind[nmind++] = k; /* Keep track for rollback */ where[k] = 2; pwgts[other] -= vwgt[k]; edegrees = rinfo[k].edegrees; edegrees[0] = edegrees[1] = 0; for (jj=xadj[k], iend=xadj[k+1]; jj<iend; jj++) { kk = adjncy[jj]; if (where[kk] != 2) edegrees[where[kk]] += vwgt[kk]; else { rinfo[kk].edegrees[other] -= vwgt[k]; /* Since the moves are one-sided this vertex has not been moved yet */ rpqUpdate(queue, kk, vwgt[kk]-rinfo[kk].edegrees[other]); } } /* Insert the new vertex into the priority queue. Safe due to one-sided moves */ rpqInsert(queue, k, vwgt[k]-edegrees[other]); } } mptr[nswaps+1] = nmind; IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopwctimer(ctrl->Aux1Tmr)); IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, printf("Moved %6"PRIDX" to %3"PRIDX", Gain: %5"PRIDX" [%5"PRIDX"] \t[%5"PRIDX" %5"PRIDX" %5"PRIDX"] [%3"PRIDX" %2"PRIDX"]\n", higain, to, (vwgt[higain]-rinfo[higain].edegrees[other]), vwgt[higain], pwgts[0], pwgts[1], pwgts[2], nswaps, limit)); } IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopwctimer(ctrl->Aux3Tmr)); /**************************************************************** * Roll back computation *****************************************************************/ IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startwctimer(ctrl->Aux2Tmr)); for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; ASSERT(CheckNodePartitionParams(graph)); ASSERT(where[higain] == to); INC_DEC(pwgts[2], pwgts[to], vwgt[higain]); where[higain] = 2; BNDInsert(nbnd, bndind, bndptr, higain); edegrees = rinfo[higain].edegrees; edegrees[0] = edegrees[1] = 0; for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; if (where[k] == 2) rinfo[k].edegrees[to] -= vwgt[higain]; else edegrees[where[k]] += vwgt[k]; } /* Push nodes out of the separator */ for (j=mptr[nswaps]; j<mptr[nswaps+1]; j++) { k = mind[j]; ASSERT(where[k] == 2); where[k] = other; INC_DEC(pwgts[other], pwgts[2], vwgt[k]); BNDDelete(nbnd, bndind, bndptr, k); for (jj=xadj[k], iend=xadj[k+1]; jj<iend; jj++) { kk = adjncy[jj]; if (where[kk] == 2) rinfo[kk].edegrees[other] += vwgt[k]; } } } IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopwctimer(ctrl->Aux2Tmr)); ASSERT(mincut == pwgts[2]); IFSET(ctrl->dbglvl, METIS_DBG_REFINE, printf("\tMinimum sep: %6"PRIDX" at %5"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); graph->mincut = mincut; graph->nbnd = nbnd; if (pass%2 == 1 && (mincutorder == -1 || mincut >= initcut)) break; } rpqDestroy(queue); WCOREPOP; }