/*********************************************************************************** * This function is the entry point of the parallel ordering algorithm. * This function assumes that the graph is already nice partitioned among the * processors and then proceeds to perform recursive bisection. ************************************************************************************/ void PAROMETIS(idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, idxtype *order, idxtype *sizes, int *options, MPI_Comm comm) { int numflag, newoptions[5]; newoptions[0] = 1; newoptions[PMV3_OPTION_DBGLVL] = options[4]; newoptions[PMV3_OPTION_SEED] = GLOBAL_SEED; numflag = options[3]; ParMETIS_V3_NodeND(vtxdist, xadj, adjncy, &numflag, newoptions, order, sizes, &comm); options[0] = -1; }
/*********************************************************************************** * This function is the entry point of the parallel ordering algorithm. * This function assumes that the graph is already nice partitioned among the * processors and then proceeds to perform recursive bisection. ************************************************************************************/ void ParMETIS_NodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, idxtype *order, idxtype *sizes, MPI_Comm *comm) { int myoptions[10]; if (options[0] == 0) { myoptions[0] = 0; } else { myoptions[0] = 1; myoptions[PMV3_OPTION_DBGLVL] = options[OPTION_DBGLVL]; myoptions[PMV3_OPTION_SEED] = GLOBAL_SEED; myoptions[PMV3_OPTION_IPART] = options[OPTION_IPART]; } ParMETIS_V3_NodeND(vtxdist, xadj, adjncy, numflag, myoptions, order, sizes, comm); }
/************************************************************************* * Let the game begin **************************************************************************/ int main(int argc, char *argv[]) { idx_t i, j, npes, mype, optype, nparts, adptf, options[10]; idx_t *part=NULL, *sizes=NULL; graph_t graph; real_t ipc2redist, *xyz=NULL, *tpwgts=NULL, ubvec[MAXNCON]; MPI_Comm comm; idx_t numflag=0, wgtflag=0, ndims, edgecut; char xyzfilename[8192]; MPI_Init(&argc, &argv); MPI_Comm_dup(MPI_COMM_WORLD, &comm); gkMPI_Comm_size(comm, &npes); gkMPI_Comm_rank(comm, &mype); if (argc != 8) { if (mype == 0) printf("Usage: %s <graph-file> <op-type> <nparts> <adapth-factor> <ipc2redist> <dbglvl> <seed>\n", argv[0]); MPI_Finalize(); exit(0); } optype = atoi(argv[2]); nparts = atoi(argv[3]); adptf = atoi(argv[4]); ipc2redist = atof(argv[5]); options[0] = 1; options[PMV3_OPTION_DBGLVL] = atoi(argv[6]); options[PMV3_OPTION_SEED] = atoi(argv[7]); if (mype == 0) printf("reading file: %s\n", argv[1]); ParallelReadGraph(&graph, argv[1], comm); /* Remove the edges for testing */ /*iset(graph.vtxdist[mype+1]-graph.vtxdist[mype]+1, 0, graph.xadj); */ rset(graph.ncon, 1.05, ubvec); tpwgts = rmalloc(nparts*graph.ncon, "tpwgts"); rset(nparts*graph.ncon, 1.0/(real_t)nparts, tpwgts); /* ChangeToFortranNumbering(graph.vtxdist, graph.xadj, graph.adjncy, mype, npes); numflag = 1; nvtxs = graph.vtxdist[mype+1]-graph.vtxdist[mype]; nedges = graph.xadj[nvtxs]; printf("%"PRIDX" %"PRIDX"\n", isum(nvtxs, graph.xadj, 1), isum(nedges, graph.adjncy, 1)); */ if (optype >= 20) { sprintf(xyzfilename, "%s.xyz", argv[1]); xyz = ReadTestCoordinates(&graph, xyzfilename, &ndims, comm); } if (mype == 0) printf("finished reading file: %s\n", argv[1]); part = ismalloc(graph.nvtxs, mype%nparts, "main: part"); sizes = imalloc(2*npes, "main: sizes"); switch (optype) { case 1: wgtflag = 3; ParMETIS_V3_PartKway(graph.vtxdist, graph.xadj, graph.adjncy, graph.vwgt, graph.adjwgt, &wgtflag, &numflag, &graph.ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm); WritePVector(argv[1], graph.vtxdist, part, MPI_COMM_WORLD); break; case 2: wgtflag = 3; options[PMV3_OPTION_PSR] = PARMETIS_PSR_COUPLED; ParMETIS_V3_RefineKway(graph.vtxdist, graph.xadj, graph.adjncy, graph.vwgt, graph.adjwgt, &wgtflag, &numflag, &graph.ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm); WritePVector(argv[1], graph.vtxdist, part, MPI_COMM_WORLD); break; case 3: options[PMV3_OPTION_PSR] = PARMETIS_PSR_COUPLED; graph.vwgt = ismalloc(graph.nvtxs, 1, "main: vwgt"); if (npes > 1) { AdaptGraph(&graph, adptf, comm); } else { wgtflag = 3; ParMETIS_V3_PartKway(graph.vtxdist, graph.xadj, graph.adjncy, graph.vwgt, graph.adjwgt, &wgtflag, &numflag, &graph.ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm); printf("Initial partitioning with edgecut of %"PRIDX"\n", edgecut); for (i=0; i<graph.ncon; i++) { for (j=0; j<graph.nvtxs; j++) { if (part[j] == i) graph.vwgt[j*graph.ncon+i] = adptf; else graph.vwgt[j*graph.ncon+i] = 1; } } } wgtflag = 3; ParMETIS_V3_AdaptiveRepart(graph.vtxdist, graph.xadj, graph.adjncy, graph.vwgt, NULL, graph.adjwgt, &wgtflag, &numflag, &graph.ncon, &nparts, tpwgts, ubvec, &ipc2redist, options, &edgecut, part, &comm); break; case 4: ParMETIS_V3_NodeND(graph.vtxdist, graph.xadj, graph.adjncy, &numflag, options, part, sizes, &comm); /* WriteOVector(argv[1], graph.vtxdist, part, comm); */ break; case 5: ParMETIS_SerialNodeND(graph.vtxdist, graph.xadj, graph.adjncy, &numflag, options, part, sizes, &comm); /* WriteOVector(argv[1], graph.vtxdist, part, comm); */ printf("%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5], sizes[6]); break; case 11: /* TestAdaptiveMETIS(graph.vtxdist, graph.xadj, graph.adjncy, part, options, adptf, comm); */ break; case 20: wgtflag = 3; ParMETIS_V3_PartGeomKway(graph.vtxdist, graph.xadj, graph.adjncy, graph.vwgt, graph.adjwgt, &wgtflag, &numflag, &ndims, xyz, &graph.ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm); break; case 21: ParMETIS_V3_PartGeom(graph.vtxdist, &ndims, xyz, part, &comm); break; } /* printf("%"PRIDX" %"PRIDX"\n", isum(nvtxs, graph.xadj, 1), isum(nedges, graph.adjncy, 1)); */ gk_free((void **)&part, &sizes, &tpwgts, &graph.vtxdist, &graph.xadj, &graph.adjncy, &graph.vwgt, &graph.adjwgt, &xyz, LTERM); MPI_Comm_free(&comm); MPI_Finalize(); return 0; }
int ORD::find_elim_ordering() { int ws; int wr; char eoname[512]; char eoname_other[512]; // Get size and rank from the communicator MPI_Comm_size(comm, &ws); MPI_Comm_rank(comm, &wr); double xtime = MPI_Wtime(); sprintf(eoname, "%s.order.%d", this->filename.c_str(), ws); sprintf(eoname_other, "%s.order_other.%d", this->filename.c_str(), ws); DEBUG("size: %d, rank %d \n", ws, wr); int n = G->get_num_nodes(); int x = n/ws; int xm = n%ws; int i = 0; DEBUG("n: %d x: %d xm: %d \n", n, x, xm); vector<int> xadj; vector<int> adjncy; vector<int> vtxdist(ws + 1, 0); vector<int> sizes(2*ws,0); vector<int> ordering(x+1, 0); vector<int> recvcnt(ws, 0); vector<int> displ(ws, 0); int numflag = 0; int options[10]; options[0] = 0; vtxdist[0] = 0; for (i = 1; i <= ws; i++) { vtxdist[i] = vtxdist[i - 1] + x; if (i <= xm) vtxdist[i]++; } // prepareing displacement and receive counts to use with MPI_Gatherv for (i = 0; i < ws; i++) { recvcnt[i] = x; if (i < xm) recvcnt[i] ++; if (i > 0) displ[i] += displ[i - 1] + recvcnt[i - 1]; } DEBUG("range: %d, %d\n", vtxdist[wr], vtxdist[wr + 1]); int j = 0; xadj.push_back(0); for (i = vtxdist[wr]; i < vtxdist[wr + 1]; i++) { Graph::Node *no = G->get_node(i); list<int> *l = no->get_nbrs_ptr(); list<int>::iterator it = l->begin(); for (; it != l->end(); ++it) { adjncy.push_back(*it); j++; } xadj.push_back(j); } if (METIS_OK != ParMETIS_V3_NodeND(&vtxdist.front(), &xadj.front(), &adjncy.front(), &numflag, options, &ordering.front(), &sizes.front(), &comm)) { FERROR("error occured while processing parmetis, aborting\n"); MPI_Abort(MPI_COMM_WORLD, -1); } DEBUG("output from ParMETIS\n"); double parmet_time = MPI_Wtime() - xtime; vector<int> recvbuf; n = G->get_num_nodes(); if (wr == 0) { recvbuf = vector<int>(n, 0); } if (MPI_SUCCESS != MPI_Gatherv((void *)&ordering.front(), recvcnt[wr], MPI_INT, (void *)&recvbuf.front(), &recvcnt.front(), &displ.front(), MPI_INT, 0, comm)) { FERROR("MPI error occured at Gatherv, Abort!\n"); MPI_Abort(comm, -1); } vector<int> eo(n, 0); if (wr == 0) { for (int i = 0; i < n; i++) { eo[recvbuf[i]] = i; } FILE *f = fopen(eoname_other, "w"); for (int i = 0; i < n; i++) fprintf(f, "%d\n", eo[i] + 1); fclose(f); DEBUG("ParMetis NodeND elimination ordering is in : %s\n", eoname_other); } ordering.clear(); ordering.resize(recvcnt[wr], 0); if (MPI_SUCCESS != MPI_Scatterv ((void *)&eo.front(), &recvcnt.front(), &displ.front(), MPI_INT, (void *)&ordering.front(), recvcnt[wr], MPI_INT, 0, comm)) { FERROR("MPI error occured at Scatterv, Abort! \n"); MPI_Abort(comm, -1); } DEBUG("Scatterv completed\n"); Graph::GraphCreatorFile gf; Graph::VertexWeightedGraph *wg; Graph::GraphEOUtil eoutil; Graph::GraphProperties prop; list<int>members(ordering.begin(), ordering.end()); wg = gf.create_component(G, &members, false); prop.make_canonical(wg); vector<int> ord(recvcnt[wr], 0); vector<int> ordsend(recvcnt[wr, 0]); double xxtime = MPI_Wtime(); eoutil.find_elimination_ordering(wg, &ord, GD_AMD, false); DEBUG("eo time : %f\n", MPI_Wtime() - xxtime); int sz = recvcnt[wr]; for (int i = 0; i < sz; i++) ordsend[i] = wg->get_node(ord[i])->get_label(); recvbuf.assign(n, -1); if (MPI_SUCCESS != MPI_Gatherv((void *)&ordsend.front(), recvcnt[wr], MPI_INT, (void *)&recvbuf.front(), &recvcnt.front(), &displ.front(), MPI_INT, 0, comm)) { FERROR("MPI error occured at Gatherv, Abort!\n"); MPI_Abort(comm, -1); } double p_amd_time = MPI_Wtime() - xtime; if (wr == 0) { FILE *f = fopen(eoname, "w"); for (int i = 0; i < n && wr == 0; i++) fprintf(f, "%d\n", recvbuf[i]); fclose(f); } DEBUG("ordering is written into %s\n", eoname); DEBUG("%f,%f\n", parmet_time, p_amd_time); return 0; }
int Zoltan_ParMetis_Order( ZZ *zz, /* Zoltan structure */ int num_obj, /* Number of (local) objects to order. */ ZOLTAN_ID_PTR gids, /* List of global ids (local to this proc) */ /* The application must allocate enough space */ ZOLTAN_ID_PTR lids, /* List of local ids (local to this proc) */ /* The application must allocate enough space */ ZOLTAN_ID_PTR rank, /* rank[i] is the rank of gids[i] */ int *iperm, ZOOS *order_opt /* Ordering options, parsed by Zoltan_Order */ ) { static char *yo = "Zoltan_ParMetis_Order"; int i, n, ierr; ZOLTAN_Output_Order ord; ZOLTAN_Third_Graph gr; #ifdef ZOLTAN_PARMETIS MPI_Comm comm = zz->Communicator;/* don't want to risk letting external packages changing our communicator */ #endif indextype numflag = 0; int timer_p = 0; int get_times = 0; int use_timers = 0; double times[5]; ZOLTAN_ID_PTR l_gids = NULL; ZOLTAN_ID_PTR l_lids = NULL; indextype options[MAX_PARMETIS_OPTIONS]; char alg[MAX_PARAM_STRING_LEN]; ZOLTAN_TRACE_ENTER(zz, yo); #ifdef ZOLTAN_PARMETIS #if TPL_USE_DATATYPE != TPL_METIS_DATATYPES #ifdef TPL_FLOAT_WEIGHT i = 1; #else i = 0; #endif if ((sizeof(indextype) != sizeof(idxtype)) || (sizeof(weighttype) != sizeof(idxtype)) || i){ ZOLTAN_THIRD_ERROR(ZOLTAN_FATAL, "Not supported: Multiple 3rd party libraries with incompatible " "data types."); return ZOLTAN_FATAL; } #endif #endif memset(&gr, 0, sizeof(ZOLTAN_Third_Graph)); memset(&ord, 0, sizeof(ZOLTAN_Output_Order)); memset(times, 0, sizeof(times)); ord.order_opt = order_opt; if (!order_opt){ /* If for some reason order_opt is NULL, allocate a new ZOOS here. */ /* This should really never happen. */ order_opt = (ZOOS *) ZOLTAN_MALLOC(sizeof(ZOOS)); strcpy(order_opt->method,"PARMETIS"); } ierr = Zoltan_Parmetis_Parse(zz, options, alg, NULL, NULL, &ord); /* ParMetis only computes the rank vector */ order_opt->return_args = RETURN_RANK; /* Check that num_obj equals the number of objects on this proc. */ /* This constraint may be removed in the future. */ n = zz->Get_Num_Obj(zz->Get_Num_Obj_Data, &ierr); if ((ierr!= ZOLTAN_OK) && (ierr!= ZOLTAN_WARN)){ /* Return error code */ ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Get_Num_Obj returned error."); return(ZOLTAN_FATAL); } if (n != num_obj){ /* Currently this is a fatal error. */ ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Input num_obj does not equal the " "number of objects."); return(ZOLTAN_FATAL); } /* Do not use weights for ordering */ gr.obj_wgt_dim = -1; gr.edge_wgt_dim = -1; gr.num_obj = num_obj; /* Check what ordering type is requested */ if (order_opt){ SET_GLOBAL_GRAPH(&gr.graph_type); /* GLOBAL by default */ #ifdef ZOLTAN_PARMETIS if ((strcmp(order_opt->method, "METIS") == 0)) #endif /* ZOLTAN_PARMETIS */ SET_LOCAL_GRAPH(&gr.graph_type); } gr.get_data = 1; if (IS_LOCAL_GRAPH(gr.graph_type) && zz->Num_Proc > 1) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Serial ordering on more than 1 process: " "set ParMetis instead."); return(ZOLTAN_FATAL); } timer_p = Zoltan_Preprocess_Timer(zz, &use_timers); /* Start timer */ get_times = (zz->Debug_Level >= ZOLTAN_DEBUG_ATIME); if (get_times){ MPI_Barrier(zz->Communicator); times[0] = Zoltan_Time(zz->Timer); } ierr = Zoltan_Preprocess_Graph(zz, &l_gids, &l_lids, &gr, NULL, NULL, NULL); if ((ierr != ZOLTAN_OK) && (ierr != ZOLTAN_WARN)) { Zoltan_Third_Exit(&gr, NULL, NULL, NULL, NULL, NULL); return (ierr); } /* Allocate space for separator sizes */ if (IS_GLOBAL_GRAPH(gr.graph_type)) { if (Zoltan_TPL_Order_Init_Tree(&zz->TPL_Order, 2*zz->Num_Proc, zz->Num_Proc) != ZOLTAN_OK) { /* Not enough memory */ Zoltan_Third_Exit(&gr, NULL, NULL, NULL, NULL, &ord); ZOLTAN_THIRD_ERROR(ZOLTAN_MEMERR, "Out of memory."); } ord.sep_sizes = (indextype*)ZOLTAN_MALLOC((2*zz->Num_Proc+1)*sizeof(indextype)); if (ord.sep_sizes == NULL) { Zoltan_Third_Exit(&gr, NULL, NULL, NULL, NULL, &ord); ZOLTAN_THIRD_ERROR(ZOLTAN_MEMERR, "Out of memory."); } memset(ord.sep_sizes, 0, (2*zz->Num_Proc+1)*sizeof(int)); /* It seems parmetis don't initialize correctly */ } /* Allocate space for direct perm */ ord.rank = (indextype *) ZOLTAN_MALLOC(gr.num_obj*sizeof(indextype)); if (!ord.rank){ /* Not enough memory */ Zoltan_Third_Exit(&gr, NULL, NULL, NULL, NULL, &ord); ZOLTAN_THIRD_ERROR(ZOLTAN_MEMERR, "Out of memory."); } if (IS_LOCAL_GRAPH(gr.graph_type)){ /* Allocate space for inverse perm */ ord.iperm = (indextype *) ZOLTAN_MALLOC(gr.num_obj*sizeof(indextype)); if (!ord.iperm){ /* Not enough memory */ Zoltan_Third_Exit(&gr, NULL, NULL, NULL, NULL, &ord); ZOLTAN_THIRD_ERROR(ZOLTAN_MEMERR, "Out of memory."); } } else ord.iperm = NULL; /* Get a time here */ if (get_times) times[1] = Zoltan_Time(zz->Timer); #ifdef ZOLTAN_PARMETIS if (IS_GLOBAL_GRAPH(gr.graph_type)){ ZOLTAN_TRACE_DETAIL(zz, yo, "Calling the ParMETIS library"); ParMETIS_V3_NodeND (gr.vtxdist, gr.xadj, gr.adjncy, &numflag, options, ord.rank, ord.sep_sizes, &comm); ZOLTAN_TRACE_DETAIL(zz, yo, "Returned from the ParMETIS library"); } else #endif /* ZOLTAN_PARMETIS */ #if defined(ZOLTAN_METIS) || defined(ZOLTAN_PARMETIS) if (IS_LOCAL_GRAPH(gr.graph_type)) { /* Be careful : permutation parameters are in the opposite order */ indextype numobj = gr.num_obj; ZOLTAN_TRACE_DETAIL(zz, yo, "Calling the METIS library"); order_opt->return_args = RETURN_RANK|RETURN_IPERM; /* We provide directly all the permutations */ #if !defined(METIS_VER_MAJOR) || METIS_VER_MAJOR < 5 options[0] = 0; /* Use default options for METIS. */ METIS_NodeND(&numobj, gr.xadj, gr.adjncy, &numflag, options, ord.iperm, ord.rank); #else METIS_SetDefaultOptions(options); METIS_NodeND(&numobj, gr.xadj, gr.adjncy, NULL, options, ord.iperm, ord.rank); /* NULL is vwgt -- new interface in v4 */ #endif ZOLTAN_TRACE_DETAIL(zz, yo, "Returned from the METIS library"); } #endif /* ZOLTAN_METIS */ /* Get a time here */ if (get_times) times[2] = Zoltan_Time(zz->Timer); if (IS_GLOBAL_GRAPH(gr.graph_type)){ /* Update Elimination tree */ int numbloc; int start; int leaf; int *converttab; int levelmax; levelmax = mylog2(zz->Num_Proc) + 1; converttab = (int*)ZOLTAN_MALLOC(zz->Num_Proc*2*sizeof(int)); memset(converttab, 0, zz->Num_Proc*2*sizeof(int)); /* Determine the first node in each separator, store it in zz->TPL_Order.start */ for (numbloc = 0, start=0, leaf=0; numbloc < zz->Num_Proc /2; numbloc++) { int father; father = zz->Num_Proc + numbloc; converttab[start] = 2*numbloc; zz->TPL_Order.leaves[leaf++]=start; zz->TPL_Order.ancestor[start] = start + 2; converttab[start+1] = 2*numbloc+1; zz->TPL_Order.leaves[leaf++]=start+1; zz->TPL_Order.ancestor[start+1] = start + 2; start+=2; do { converttab[start] = father; if (father %2 == 0) { int nextoffset; int level; level = mylog2(2*zz->Num_Proc - 1 - father); nextoffset = (1<<(levelmax-level)); zz->TPL_Order.ancestor[start] = start+nextoffset; start++; break; } else { zz->TPL_Order.ancestor[start] = start+1; start++; father = zz->Num_Proc + father/2; } } while (father < 2*zz->Num_Proc - 1); } zz->TPL_Order.start[0] = 0; zz->TPL_Order.ancestor [2*zz->Num_Proc - 2] = -1; for (numbloc = 1 ; numbloc < 2*zz->Num_Proc ; numbloc++) { int oldblock=converttab[numbloc-1]; zz->TPL_Order.start[numbloc] = zz->TPL_Order.start[numbloc-1] + ord.sep_sizes[oldblock]; } ZOLTAN_FREE(&converttab); ZOLTAN_FREE(&ord.sep_sizes); zz->TPL_Order.leaves[zz->Num_Proc] = -1; zz->TPL_Order.nbr_leaves = zz->Num_Proc; zz->TPL_Order.nbr_blocks = 2*zz->Num_Proc-1; } else { /* No tree */ zz->TPL_Order.nbr_blocks = 0; zz->TPL_Order.start = NULL; zz->TPL_Order.ancestor = NULL; zz->TPL_Order.leaves = NULL; } /* Correct because no redistribution */ memcpy(gids, l_gids, n*zz->Num_GID*sizeof(ZOLTAN_ID_TYPE)); memcpy(lids, l_lids, n*zz->Num_LID*sizeof(ZOLTAN_ID_TYPE)); ierr = Zoltan_Postprocess_Graph (zz, l_gids, l_lids, &gr, NULL, NULL, NULL, &ord, NULL); ZOLTAN_FREE(&l_gids); ZOLTAN_FREE(&l_lids); /* Get a time here */ if (get_times) times[3] = Zoltan_Time(zz->Timer); if (get_times) Zoltan_Third_DisplayTime(zz, times); if (use_timers) ZOLTAN_TIMER_STOP(zz->ZTime, timer_p, zz->Communicator); if (sizeof(indextype) == sizeof(ZOLTAN_ID_TYPE)){ memcpy(rank, ord.rank, gr.num_obj*sizeof(indextype)); } else{ for (i=0; i < gr.num_obj; i++){ rank[i] = (ZOLTAN_ID_TYPE)ord.rank[i]; } } if ((ord.iperm != NULL) && (iperm != NULL)){ if (sizeof(indextype) == sizeof(int)){ memcpy(iperm, ord.iperm, gr.num_obj*sizeof(indextype)); } else{ for (i=0; i < gr.num_obj; i++){ iperm[i] = (int)ord.iperm[i]; } } } if (ord.iperm != NULL) ZOLTAN_FREE(&ord.iperm); ZOLTAN_FREE(&ord.rank); /* Free all other "graph" stuff */ Zoltan_Third_Exit(&gr, NULL, NULL, NULL, NULL, NULL); ZOLTAN_TRACE_EXIT(zz, yo); return (ZOLTAN_OK); }
/*! \brief * * <pre> * Purpose * ======= * * GET_PERM_C_PARMETIS obtains a permutation matrix Pc, by applying a * graph partitioning algorithm to the symmetrized graph A+A'. The * multilevel graph partitioning algorithm used is the * ParMETIS_V3_NodeND routine available in the parallel graph * partitioning package parMETIS. * * The number of independent sub-domains noDomains computed by this * algorithm has to be a power of 2. Hence noDomains is the larger * number power of 2 that is smaller than nprocs_i, where nprocs_i = nprow * * npcol is the number of processors used in SuperLU_DIST. * * Arguments * ========= * * A (input) SuperMatrix* * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number * of the linear equations is A->nrow. Matrix A is distributed * in NRformat_loc format. * * perm_r (input) int_t* * Row permutation vector of size A->nrow, which defines the * permutation matrix Pr; perm_r[i] = j means row i of A is in * position j in Pr*A. * * perm_c (output) int_t* * Column permutation vector of size A->ncol, which defines the * permutation matrix Pc; perm_c[i] = j means column i of A is * in position j in A*Pc. * * nprocs_i (input) int* * Number of processors the input matrix is distributed on in a block * row format. It corresponds to number of processors used in * SuperLU_DIST. * * noDomains (input) int*, must be power of 2 * Number of independent domains to be computed by the graph * partitioning algorithm. ( noDomains <= nprocs_i ) * * sizes (output) int_t**, of size 2 * noDomains * Returns pointer to an array containing the number of nodes * for each sub-domain and each separator. Separators are stored * from left to right. * Memory for the array is allocated in this routine. * * fstVtxSep (output) int_t**, of size 2 * noDomains * Returns pointer to an array containing first node for each * sub-domain and each separator. * Memory for the array is allocated in this routine. * * Return value * ============ * < 0, number of bytes allocated on return from the symbolic factorization. * > 0, number of bytes allocated when out of memory. * </pre> */ float get_perm_c_parmetis (SuperMatrix *A, int_t *perm_r, int_t *perm_c, int nprocs_i, int noDomains, int_t **sizes, int_t **fstVtxSep, gridinfo_t *grid, MPI_Comm *metis_comm) { NRformat_loc *Astore; int iam, p; #if 0 int *b_rowptr_int, *b_colind_int, *l_sizes_int, *dist_order_int, *vtxdist_o_int; int *options, numflag; #else /* 64-bit integers */ int_t options[4]={0,0,0,1}, numflag; #endif int_t m_loc, fst_row; int_t m, n, bnz, i, j; int_t *rowptr, *colind, *l_fstVtxSep, *l_sizes; int_t *b_rowptr, *b_colind; int_t *dist_order; int *recvcnts, *displs; /* first row index on each processor when the matrix is distributed on nprocs (vtxdist_i) or noDomains processors (vtxdist_o) */ int_t *vtxdist_i, *vtxdist_o; int_t szSep, k, noNodes; float apat_mem_l; /* memory used during the computation of the graph of A+A' */ float mem; /* Memory used during this routine */ MPI_Status status; /* Initialization. */ MPI_Comm_rank (grid->comm, &iam); n = A->ncol; m = A->nrow; if ( m != n ) ABORT("Matrix is not square"); mem = 0.; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter get_perm_c_parmetis()"); #endif Astore = (NRformat_loc *) A->Store; m_loc = Astore->m_loc; /* number of rows local to this processor */ fst_row = Astore->fst_row; /* global index of the first row */ rowptr = Astore->rowptr; /* pointer to rows and column indices */ colind = Astore->colind; #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. Use parMETIS ordering on A'+A with %d sub-domains.\n", noDomains); #endif numflag = 0; /* determine first row on each processor */ vtxdist_i = (int_t *) SUPERLU_MALLOC((nprocs_i+1) * sizeof(int_t)); if ( !vtxdist_i ) ABORT("SUPERLU_MALLOC fails for vtxdist_i."); vtxdist_o = (int_t *) SUPERLU_MALLOC((nprocs_i+1) * sizeof(int_t)); if ( !vtxdist_o ) ABORT("SUPERLU_MALLOC fails for vtxdist_o."); MPI_Allgather (&fst_row, 1, mpi_int_t, vtxdist_i, 1, mpi_int_t, grid->comm); vtxdist_i[nprocs_i] = m; if (noDomains == nprocs_i) { /* keep the same distribution of A */ for (p = 0; p <= nprocs_i; p++) vtxdist_o[p] = vtxdist_i[p]; } else { i = n / noDomains; j = n % noDomains; for (k = 0, p = 0; p < noDomains; p++) { vtxdist_o[p] = k; k += i; if (p < j) k++; } /* The remaining non-participating processors get the same first-row-number as the last processor. */ for (p = noDomains; p <= nprocs_i; p++) vtxdist_o[p] = k; } #if ( DEBUGlevel>=2 ) if (!iam) PrintInt10 ("vtxdist_o", nprocs_i + 1, vtxdist_o); #endif /* Compute distributed A + A' */ if ((apat_mem_l = a_plus_at_CompRow_loc(iam, perm_r, nprocs_i, vtxdist_i, n, rowptr, colind, noDomains, vtxdist_o, &bnz, &b_rowptr, &b_colind, grid)) > 0) return (apat_mem_l); mem += -apat_mem_l; /* Initialize and allocate storage for parMetis. */ (*sizes) = (int_t *) SUPERLU_MALLOC(2 * noDomains * sizeof(int_t)); if (!(*sizes)) ABORT("SUPERLU_MALLOC fails for sizes."); l_sizes = *sizes; (*fstVtxSep) = (int_t *) SUPERLU_MALLOC(2 * noDomains * sizeof(int_t)); if (!(*fstVtxSep)) ABORT("SUPERLU_MALLOC fails for fstVtxSep."); l_fstVtxSep = *fstVtxSep; m_loc = vtxdist_o[iam+1] - vtxdist_o[iam]; if ( iam < noDomains) /* dist_order is the perm returned by parMetis, distributed */ if (! (dist_order = (int_t *) SUPERLU_MALLOC(m_loc * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for dist_order."); #if 0 /* ParMETIS represents the column pointers and row indices of * * the input matrix using integers. When SuperLU_DIST uses * * long int for the int_t type, then several supplementary * * copies need to be performed in order to call ParMETIS. */ #if defined (_LONGINT) l_sizes_int = (int *) SUPERLU_MALLOC(2 * noDomains * sizeof(int)); if (!(l_sizes_int)) ABORT("SUPERLU_MALLOC fails for l_sizes_int."); /* Allocate storage */ if ( !(b_rowptr_int = (int*) SUPERLU_MALLOC((m_loc+1) * sizeof(int)))) ABORT("SUPERLU_MALLOC fails for b_rowptr_int[]"); for (i = 0; i <= m_loc; i++) b_rowptr_int[i] = b_rowptr[i]; SUPERLU_FREE (b_rowptr); if ( bnz ) { if ( !(b_colind_int = (int *) SUPERLU_MALLOC( bnz * sizeof(int)))) ABORT("SUPERLU_MALLOC fails for b_colind_int[]"); for (i = 0; i < bnz; i++) b_colind_int[i] = b_colind[i]; SUPERLU_FREE (b_colind); } if ( !(vtxdist_o_int = (int *) SUPERLU_MALLOC((nprocs_i+1) * sizeof(int)))) ABORT("SUPERLU_MALLOC fails for vtxdist_o_int."); for (i = 0; i <= nprocs_i; i++) vtxdist_o_int[i] = vtxdist_o[i]; SUPERLU_FREE (vtxdist_o); #else /* Default */ vtxdist_o_int = vtxdist_o; b_rowptr_int = b_rowptr; b_colind_int = b_colind; l_sizes_int = l_sizes; #endif #endif if ( iam < noDomains) { ParMETIS_V3_NodeND(vtxdist_o, b_rowptr, b_colind, &numflag, options, dist_order, l_sizes, metis_comm); } if (bnz) SUPERLU_FREE (b_colind); SUPERLU_FREE (b_rowptr); #if 0 if ( iam < noDomains) { SUPERLU_FREE (options); } #if defined (_LONGINT) /* Copy data from dist_order_int to dist_order */ if ( iam < noDomains) { /* dist_order is the perm returned by parMetis, distributed */ if (!(dist_order = (int_t *) SUPERLU_MALLOC(m_loc * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for dist_order."); for (i = 0; i < m_loc; i++) dist_order[i] = dist_order_int[i]; SUPERLU_FREE(dist_order_int); for (i = 0; i < 2*noDomains; i++) l_sizes[i] = l_sizes_int[i]; SUPERLU_FREE(l_sizes_int); } #else dist_order = dist_order_int; #endif #endif /* Allgatherv dist_order to get perm_c */ if (!(displs = (int *) SUPERLU_MALLOC (nprocs_i * sizeof(int)))) ABORT ("SUPERLU_MALLOC fails for displs."); if ( !(recvcnts = (int *) SUPERLU_MALLOC (nprocs_i * sizeof(int)))) ABORT ("SUPERLU_MALLOC fails for recvcnts."); for (i = 0; i < nprocs_i; i++) recvcnts[i] = vtxdist_o[i+1] - vtxdist_o[i]; displs[0]=0; for(i=1; i < nprocs_i; i++) displs[i] = displs[i-1] + recvcnts[i-1]; MPI_Allgatherv (dist_order, m_loc, mpi_int_t, perm_c, recvcnts, displs, mpi_int_t, grid->comm); if ( iam < noDomains) { SUPERLU_FREE (dist_order); } SUPERLU_FREE (vtxdist_i); SUPERLU_FREE (vtxdist_o); SUPERLU_FREE (recvcnts); SUPERLU_FREE (displs); /* send l_sizes to every processor p >= noDomains */ if (!iam) for (p = noDomains; p < nprocs_i; p++) MPI_Send (l_sizes, 2*noDomains, mpi_int_t, p, 0, grid->comm); if (noDomains <= iam && iam < nprocs_i) MPI_Recv (l_sizes, 2*noDomains, mpi_int_t, 0, 0, grid->comm, &status); /* Determine the first node in each separator, store it in l_fstVtxSep */ for (j = 0; j < 2 * noDomains; j++) l_fstVtxSep[j] = 0; l_fstVtxSep[2*noDomains - 2] = l_sizes[2*noDomains - 2]; szSep = noDomains; i = 0; while (szSep != 1) { for (j = i; j < i + szSep; j++) { l_fstVtxSep[j] += l_sizes[j]; } for (j = i; j < i + szSep; j++) { k = i + szSep + (j-i) / 2; l_fstVtxSep[k] += l_fstVtxSep[j]; } i += szSep; szSep = szSep / 2; } l_fstVtxSep[2 * noDomains - 2] -= l_sizes[2 * noDomains - 2]; i = 2 * noDomains - 2; szSep = 1; while (i > 0) { for (j = i; j < i + szSep; j++) { k = (i - 2 * szSep) + (j-i) * 2 + 1; noNodes = l_fstVtxSep[k]; l_fstVtxSep[k] = l_fstVtxSep[j] - l_sizes[k]; l_fstVtxSep[k-1] = l_fstVtxSep[k] + l_sizes[k] - noNodes - l_sizes[k-1]; } szSep *= 2; i -= szSep; } #if ( PRNTlevel>=2 ) if (!iam ) { PrintInt10 ("Sizes of separators", 2 * noDomains-1, l_sizes); PrintInt10 ("First Vertex Separator", 2 * noDomains-1, l_fstVtxSep); } #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit get_perm_c_parmetis()"); #endif return (-mem); } /* get_perm_c_parmetis */
/*********************************************************************************** * This function is the testing routine for the adaptive multilevel partitioning code. * It computes a partition from scratch, it then moves the graph and changes some * of the vertex weights and then call the adaptive code. ************************************************************************************/ void TestParMetis_V3(char *filename, MPI_Comm comm) { int ncon, nparts, npes, mype, opt2, realcut; GraphType graph, mgraph; idxtype *part, *mpart, *savepart, *order, *sizes; int numflag=0, wgtflag=0, options[10], edgecut, ndims; float ipc2redist, *xyz, *tpwgts = NULL, ubvec[MAXNCON]; MPI_Comm_size(comm, &npes); MPI_Comm_rank(comm, &mype); ndims = 2; ParallelReadGraph(&graph, filename, comm); xyz = ReadTestCoordinates(&graph, filename, 2, comm); MPI_Barrier(comm); part = idxmalloc(graph.nvtxs, "TestParMetis_V3: part"); tpwgts = fmalloc(MAXNCON*npes*2, "TestParMetis_V3: tpwgts"); sset(MAXNCON, 1.05, ubvec); graph.vwgt = idxsmalloc(graph.nvtxs*5, 1, "TestParMetis_V3: vwgt"); /*====================================================================== / ParMETIS_V3_PartKway /=======================================================================*/ options[0] = 1; options[1] = 3; options[2] = 1; wgtflag = 2; numflag = 0; edgecut = 0; for (nparts=2*npes; nparts>=npes/2 && nparts > 0; nparts = nparts/2) { for (ncon=1; ncon<=5; ncon+=2) { if (ncon > 1 && nparts > 1) Mc_AdaptGraph(&graph, part, ncon, nparts, comm); else idxset(graph.nvtxs, 1, graph.vwgt); for (opt2=1; opt2<=2; opt2++) { options[2] = opt2; sset(nparts*ncon, 1.0/(float)nparts, tpwgts); if (mype == 0) printf("\nTesting ParMETIS_V3_PartKway with options[1-2] = {%d %d}, Ncon: %d, Nparts: %d\n", options[1], options[2], ncon, nparts); ParMETIS_V3_PartKway(graph.vtxdist, graph.xadj, graph.adjncy, graph.vwgt, NULL, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm); if (mype == 0) { printf("ParMETIS_V3_PartKway reported a cut of %d\n", edgecut); } } } } /*====================================================================== / ParMETIS_V3_PartGeomKway /=======================================================================*/ options[0] = 1; options[1] = 3; wgtflag = 2; numflag = 0; for (nparts=2*npes; nparts>=npes/2 && nparts > 0; nparts = nparts/2) { for (ncon=1; ncon<=5; ncon+=2) { if (ncon > 1) Mc_AdaptGraph(&graph, part, ncon, nparts, comm); else idxset(graph.nvtxs, 1, graph.vwgt); for (opt2=1; opt2<=2; opt2++) { options[2] = opt2; sset(nparts*ncon, 1.0/(float)nparts, tpwgts); if (mype == 0) printf("\nTesting ParMETIS_V3_PartGeomKway with options[1-2] = {%d %d}, Ncon: %d, Nparts: %d\n", options[1], options[2], ncon, nparts); ParMETIS_V3_PartGeomKway(graph.vtxdist, graph.xadj, graph.adjncy, graph.vwgt, NULL, &wgtflag, &numflag, &ndims, xyz, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm); if (mype == 0) { printf("ParMETIS_V3_PartGeomKway reported a cut of %d\n", edgecut); } } } } /*====================================================================== / ParMETIS_V3_PartGeom /=======================================================================*/ wgtflag = 0; numflag = 0; if (mype == 0) printf("\nTesting ParMETIS_V3_PartGeom\n"); /* ParMETIS_V3_PartGeom(graph.vtxdist, &ndims, xyz, part, &comm); */ if (mype == 0) printf("ParMETIS_V3_PartGeom partition complete\n"); /* realcut = ComputeRealCut(graph.vtxdist, part, filename, comm); if (mype == 0) printf("ParMETIS_V3_PartGeom reported a cut of %d\n", realcut); */ /*====================================================================== / ParMETIS_V3_RefineKway /=======================================================================*/ options[0] = 1; options[1] = 3; options[2] = 1; options[3] = COUPLED; nparts = npes; wgtflag = 0; numflag = 0; ncon = 1; sset(nparts*ncon, 1.0/(float)nparts, tpwgts); if (mype == 0) printf("\nTesting ParMETIS_V3_RefineKway with default options (before move)\n"); ParMETIS_V3_RefineKway(graph.vtxdist, graph.xadj, graph.adjncy, NULL, NULL, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm); MALLOC_CHECK(NULL); if (mype == 0) { printf("ParMETIS_V3_RefineKway reported a cut of %d\n", edgecut); } MALLOC_CHECK(NULL); /* Compute a good partition and move the graph. Do so quietly! */ options[0] = 0; nparts = npes; wgtflag = 0; numflag = 0; ncon = 1; sset(nparts*ncon, 1.0/(float)nparts, tpwgts); ParMETIS_V3_PartKway(graph.vtxdist, graph.xadj, graph.adjncy, NULL, NULL, &wgtflag, &numflag, &ncon, &npes, tpwgts, ubvec, options, &edgecut, part, &comm); TestMoveGraph(&graph, &mgraph, part, comm); GKfree((void *)&(graph.vwgt), LTERM); mpart = idxsmalloc(mgraph.nvtxs, mype, "TestParMetis_V3: mpart"); savepart = idxmalloc(mgraph.nvtxs, "TestParMetis_V3: savepart"); MALLOC_CHECK(NULL); /*====================================================================== / ParMETIS_V3_RefineKway /=======================================================================*/ options[0] = 1; options[1] = 3; options[3] = COUPLED; nparts = npes; wgtflag = 0; numflag = 0; for (ncon=1; ncon<=5; ncon+=2) { for (opt2=1; opt2<=2; opt2++) { options[2] = opt2; sset(nparts*ncon, 1.0/(float)nparts, tpwgts); if (mype == 0) printf("\nTesting ParMETIS_V3_RefineKway with options[1-3] = {%d %d %d}, Ncon: %d, Nparts: %d\n", options[1], options[2], options[3], ncon, nparts); ParMETIS_V3_RefineKway(mgraph.vtxdist, mgraph.xadj, mgraph.adjncy, NULL, NULL, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, mpart, &comm); if (mype == 0) { printf("ParMETIS_V3_RefineKway reported a cut of %d\n", edgecut); } } } /*====================================================================== / ParMETIS_V3_AdaptiveRepart /=======================================================================*/ mgraph.vwgt = idxsmalloc(mgraph.nvtxs*5, 1, "TestParMetis_V3: mgraph.vwgt"); mgraph.vsize = idxsmalloc(mgraph.nvtxs, 1, "TestParMetis_V3: mgraph.vsize"); AdaptGraph(&mgraph, 4, comm); options[0] = 1; options[1] = 7; options[3] = COUPLED; wgtflag = 2; numflag = 0; for (nparts=2*npes; nparts>=npes/2; nparts = nparts/2) { ncon = 1; wgtflag = 0; options[0] = 0; sset(nparts*ncon, 1.0/(float)nparts, tpwgts); ParMETIS_V3_PartKway(mgraph.vtxdist, mgraph.xadj, mgraph.adjncy, NULL, NULL, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, savepart, &comm); options[0] = 1; wgtflag = 2; for (ncon=1; ncon<=3; ncon+=2) { sset(nparts*ncon, 1.0/(float)nparts, tpwgts); if (ncon > 1) Mc_AdaptGraph(&mgraph, savepart, ncon, nparts, comm); else AdaptGraph(&mgraph, 4, comm); /* idxset(mgraph.nvtxs, 1, mgraph.vwgt); */ for (ipc2redist=1000.0; ipc2redist>=0.001; ipc2redist/=1000.0) { for (opt2=1; opt2<=2; opt2++) { idxcopy(mgraph.nvtxs, savepart, mpart); options[2] = opt2; if (mype == 0) printf("\nTesting ParMETIS_V3_AdaptiveRepart with options[1-3] = {%d %d %d}, ipc2redist: %.3f, Ncon: %d, Nparts: %d\n", options[1], options[2], options[3], ipc2redist, ncon, nparts); ParMETIS_V3_AdaptiveRepart(mgraph.vtxdist, mgraph.xadj, mgraph.adjncy, mgraph.vwgt, mgraph.vsize, NULL, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, &ipc2redist, options, &edgecut, mpart, &comm); if (mype == 0) { printf("ParMETIS_V3_AdaptiveRepart reported a cut of %d\n", edgecut); } } } } } free(mgraph.vwgt); free(mgraph.vsize); /*====================================================================== / ParMETIS_V3_NodeND /=======================================================================*/ sizes = idxmalloc(2*npes, "TestParMetis_V3: sizes"); order = idxmalloc(graph.nvtxs, "TestParMetis_V3: sizes"); options[0] = 1; options[PMV3_OPTION_DBGLVL] = 3; options[PMV3_OPTION_SEED] = 1; numflag = 0; for (opt2=1; opt2<=2; opt2++) { options[PMV3_OPTION_IPART] = opt2; if (mype == 0) printf("\nTesting ParMETIS_V3_NodeND with options[1-3] = {%d %d %d}\n", options[1], options[2], options[3]); ParMETIS_V3_NodeND(graph.vtxdist, graph.xadj, graph.adjncy, &numflag, options, order, sizes, &comm); } GKfree(&tpwgts, &part, &mpart, &savepart, &order, &sizes, LTERM); }