void sync_layer(network *nets, int n, int j) { //printf("Syncing layer %d\n", j); int i; network net = nets[0]; layer base = net.layers[j]; cuda_set_device(net.gpu_index); pull_weights(base); for (i = 1; i < n; ++i) { cuda_set_device(nets[i].gpu_index); layer l = nets[i].layers[j]; pull_weights(l); merge_weights(l, base); } scale_weights(base, 1./n); for (i = 0; i < n; ++i) { cuda_set_device(nets[i].gpu_index); layer l = nets[i].layers[j]; distribute_weights(l, base); } //printf("Done syncing layer %d\n", j); }
void AlgParMETIS<Adapter>::partition( const RCP<PartitioningSolution<Adapter> > &solution ) { HELLO; size_t numGlobalParts = solution->getTargetGlobalNumberOfParts(); int np = problemComm->getSize(); // Get vertex info ArrayView<const gno_t> vtxgnos; ArrayView<StridedData<lno_t, scalar_t> > vwgts; int nVwgt = model->getNumWeightsPerVertex(); size_t nVtx = model->getVertexList(vtxgnos, vwgts); pm_idx_t pm_nVtx; TPL_Traits<pm_idx_t,size_t>::ASSIGN_TPL_T(pm_nVtx, nVtx); pm_idx_t *pm_vwgts = NULL; if (nVwgt) { pm_vwgts = new pm_idx_t[nVtx*nVwgt]; scale_weights(nVtx, vwgts, pm_vwgts); } // Get edge info ArrayView<const gno_t> adjgnos; ArrayView<const lno_t> offsets; ArrayView<StridedData<lno_t, scalar_t> > ewgts; int nEwgt = model->getNumWeightsPerEdge(); size_t nEdge = model->getEdgeList(adjgnos, offsets, ewgts); pm_idx_t *pm_ewgts = NULL; if (nEwgt) { pm_ewgts = new pm_idx_t[nEdge*nEwgt]; scale_weights(nEdge, ewgts, pm_ewgts); } // Convert index types for edges, if needed pm_idx_t *pm_offsets; TPL_Traits<pm_idx_t,const lno_t>::ASSIGN_TPL_T_ARRAY(&pm_offsets, offsets); pm_idx_t *pm_adjs; pm_idx_t pm_dummy_adj; if (nEdge) TPL_Traits<pm_idx_t,const gno_t>::ASSIGN_TPL_T_ARRAY(&pm_adjs, adjgnos); else pm_adjs = &pm_dummy_adj; // ParMETIS does not like NULL pm_adjs; // Build vtxdist pm_idx_t *pm_vtxdist; ArrayView<size_t> vtxdist; model->getVertexDist(vtxdist); TPL_Traits<pm_idx_t,size_t>::ASSIGN_TPL_T_ARRAY(&pm_vtxdist, vtxdist); // ParMETIS does not like processors having no vertices. // Inspect vtxdist and remove from communicator procs that have no vertices RCP<Comm<int> > subcomm; MPI_Comm mpicomm; // Note: mpicomm is valid only while subcomm is in scope if (np > 1) { int nKeep = 0; Array<int> keepRanks(np); for (int i = 0; i < np; i++) { if ((pm_vtxdist[i+1] - pm_vtxdist[i]) > 0) { keepRanks[nKeep] = i; pm_vtxdist[nKeep] = pm_vtxdist[i]; nKeep++; } } pm_vtxdist[nKeep] = pm_vtxdist[np]; if (nKeep < np) { subcomm = problemComm->createSubcommunicator(keepRanks.view(0,nKeep)); if (subcomm != Teuchos::null) mpicomm = Teuchos::getRawMpiComm(*subcomm); else mpicomm = MPI_COMM_NULL; } else { mpicomm = Teuchos::getRawMpiComm(*problemComm); } } else { mpicomm = Teuchos::getRawMpiComm(*problemComm); } // Create array for ParMETIS to return results in. pm_idx_t *pm_partList = NULL; if (nVtx) pm_partList = new pm_idx_t[nVtx]; if (mpicomm != MPI_COMM_NULL) { // If in ParMETIS' communicator (i.e., have vertices), call ParMETIS // Get target part sizes pm_idx_t pm_nCon = (nVwgt == 0 ? 1 : pm_idx_t(nVwgt)); pm_real_t *pm_partsizes = new pm_real_t[numGlobalParts*pm_nCon]; for (pm_idx_t dim = 0; dim < pm_nCon; dim++) { if (!solution->criteriaHasUniformPartSizes(dim)) for (size_t i=0; i<numGlobalParts; i++) pm_partsizes[i*pm_nCon+dim] = pm_real_t(solution->getCriteriaPartSize(dim,i)); else for (size_t i=0; i<numGlobalParts; i++) pm_partsizes[i*pm_nCon+dim] = pm_real_t(1.)/pm_real_t(numGlobalParts); } // Get imbalance tolerances double tolerance = 1.1; const Teuchos::ParameterList &pl = env->getParameters(); const Teuchos::ParameterEntry *pe = pl.getEntryPtr("imbalance_tolerance"); if (pe) tolerance = pe->getValue<double>(&tolerance); pm_real_t *pm_imbTols = new pm_real_t[pm_nCon]; for (pm_idx_t dim = 0; dim < pm_nCon; dim++) pm_imbTols[dim] = pm_real_t(tolerance); std::string parmetis_method("PARTKWAY"); pe = pl.getEntryPtr("partitioning_approach"); if (pe){ std::string approach; approach = pe->getValue<std::string>(&approach); if ((approach == "repartition") || (approach == "maximize_overlap")) parmetis_method = "REFINE_KWAY"; // TODO: AdaptiveRepart } // Other ParMETIS parameters? pm_idx_t pm_wgtflag = 2*(nVwgt > 0) + (nEwgt > 0); pm_idx_t pm_numflag = 0; pm_idx_t pm_edgecut = -1; pm_idx_t pm_options[METIS_NOPTIONS]; pm_options[0] = 1; // Use non-default options for some ParMETIS options for (int i = 0; i < METIS_NOPTIONS; i++) pm_options[i] = 0; // Default options pm_options[2] = 15; // Matches default value used in Zoltan pm_idx_t pm_nPart; TPL_Traits<pm_idx_t,size_t>::ASSIGN_TPL_T(pm_nPart, numGlobalParts); if (parmetis_method == "PARTKWAY") { ParMETIS_V3_PartKway(pm_vtxdist, pm_offsets, pm_adjs, pm_vwgts, pm_ewgts, &pm_wgtflag, &pm_numflag, &pm_nCon, &pm_nPart, pm_partsizes, pm_imbTols, pm_options, &pm_edgecut, pm_partList, &mpicomm); } else if (parmetis_method == "ADAPTIVE_REPART") { // Get object sizes: pm_vsize std::cout << "NOT READY FOR ADAPTIVE_REPART YET; NEED VSIZE" << std::endl; exit(-1); //pm_real_t itr = 100.; // Same default as in Zoltan //ParMETIS_V3_AdaptiveRepart(pm_vtxdist, pm_offsets, pm_adjs, pm_vwgts, // pm_vsize, pm_ewgts, &pm_wgtflag, // &pm_numflag, &pm_nCon, &pm_nPart, // pm_partsizes, pm_imbTols, // &itr, pm_options, // &pm_edgecut, pm_partList, &mpicomm); } else if (parmetis_method == "REFINE_KWAY") { ParMETIS_V3_RefineKway(pm_vtxdist, pm_offsets, pm_adjs, pm_vwgts, pm_ewgts, &pm_wgtflag, &pm_numflag, &pm_nCon, &pm_nPart, pm_partsizes, pm_imbTols, pm_options, &pm_edgecut, pm_partList, &mpicomm); } // Clean up delete [] pm_partsizes; delete [] pm_imbTols; } // Load answer into the solution. ArrayRCP<part_t> partList; if (nVtx) { if (TPL_Traits<pm_idx_t, part_t>::OK_TO_CAST_TPL_T()) { partList = ArrayRCP<part_t>((part_t *)pm_partList, 0, nVtx, true); } else { // TODO Probably should have a TPL_Traits function to do the following partList = ArrayRCP<part_t>(new part_t[nVtx], 0, nVtx, true); for (size_t i = 0; i < nVtx; i++) { partList[i] = part_t(pm_partList[i]); } delete [] pm_partList; } } solution->setParts(partList); env->memory("Zoltan2-ParMETIS: After creating solution"); // Clean up copies made due to differing data sizes. TPL_Traits<pm_idx_t,size_t>::DELETE_TPL_T_ARRAY(&pm_vtxdist); TPL_Traits<pm_idx_t,const lno_t>::DELETE_TPL_T_ARRAY(&pm_offsets); if (nEdge) TPL_Traits<pm_idx_t,const gno_t>::DELETE_TPL_T_ARRAY(&pm_adjs); if (nVwgt) delete [] pm_vwgts; if (nEwgt) delete [] pm_ewgts; }
void AlgPTScotch<Adapter>::partition( const RCP<PartitioningSolution<Adapter> > &solution ) { HELLO; size_t numGlobalParts = solution->getTargetGlobalNumberOfParts(); SCOTCH_Num partnbr=0; TPL_Traits<SCOTCH_Num, size_t>::ASSIGN_TPL_T(partnbr, numGlobalParts, env); #ifdef HAVE_ZOLTAN2_MPI int ierr = 0; int me = problemComm->getRank(); const SCOTCH_Num baseval = 0; // Base value for array indexing. // GraphModel returns GNOs from base 0. SCOTCH_Strat stratstr; // Strategy string // TODO: Set from parameters SCOTCH_stratInit(&stratstr); // Allocate and initialize PTScotch Graph data structure. SCOTCH_Dgraph *gr = SCOTCH_dgraphAlloc(); // Scotch distributed graph ierr = SCOTCH_dgraphInit(gr, mpicomm); env->globalInputAssertion(__FILE__, __LINE__, "SCOTCH_dgraphInit", !ierr, BASIC_ASSERTION, problemComm); // Get vertex info ArrayView<const gno_t> vtxID; ArrayView<StridedData<lno_t, scalar_t> > xyz; ArrayView<StridedData<lno_t, scalar_t> > vwgts; size_t nVtx = model->getVertexList(vtxID, xyz, vwgts); SCOTCH_Num vertlocnbr=0; TPL_Traits<SCOTCH_Num, size_t>::ASSIGN_TPL_T(vertlocnbr, nVtx, env); SCOTCH_Num vertlocmax = vertlocnbr; // Assumes no holes in global nums. // Get edge info ArrayView<const gno_t> edgeIds; ArrayView<const int> procIds; ArrayView<const lno_t> offsets; ArrayView<StridedData<lno_t, scalar_t> > ewgts; size_t nEdge = model->getEdgeList(edgeIds, procIds, offsets, ewgts); SCOTCH_Num edgelocnbr=0; TPL_Traits<SCOTCH_Num, size_t>::ASSIGN_TPL_T(edgelocnbr, nEdge, env); const SCOTCH_Num edgelocsize = edgelocnbr; // Assumes adj array is compact. SCOTCH_Num *vertloctab; // starting adj/vtx TPL_Traits<SCOTCH_Num, lno_t>::ASSIGN_TPL_T_ARRAY(&vertloctab, offsets, env); SCOTCH_Num *edgeloctab; // adjacencies TPL_Traits<SCOTCH_Num, gno_t>::ASSIGN_TPL_T_ARRAY(&edgeloctab, edgeIds, env); // We don't use these arrays, but we need them as arguments to Scotch. SCOTCH_Num *vendloctab = NULL; // Assume consecutive storage for adj SCOTCH_Num *vlblloctab = NULL; // Vertex label array SCOTCH_Num *edgegsttab = NULL; // Array for ghost vertices // Get weight info. SCOTCH_Num *velotab = NULL; // Vertex weights SCOTCH_Num *edlotab = NULL; // Edge weights int nVwgts = model->getNumWeightsPerVertex(); int nEwgts = model->getNumWeightsPerEdge(); if (nVwgts > 1 && me == 0) { std::cerr << "Warning: NumWeightsPerVertex is " << nVwgts << " but Scotch allows only one weight. " << " Zoltan2 will use only the first weight per vertex." << std::endl; } if (nEwgts > 1 && me == 0) { std::cerr << "Warning: NumWeightsPerEdge is " << nEwgts << " but Scotch allows only one weight. " << " Zoltan2 will use only the first weight per edge." << std::endl; } if (nVwgts) { velotab = new SCOTCH_Num[nVtx+1]; // +1 since Scotch wants all procs // to have non-NULL arrays scale_weights(nVtx, vwgts[0], velotab); } if (nEwgts) { edlotab = new SCOTCH_Num[nEdge+1]; // +1 since Scotch wants all procs // to have non-NULL arrays scale_weights(nEdge, ewgts[0], edlotab); } // Build PTScotch distributed data structure ierr = SCOTCH_dgraphBuild(gr, baseval, vertlocnbr, vertlocmax, vertloctab, vendloctab, velotab, vlblloctab, edgelocnbr, edgelocsize, edgeloctab, edgegsttab, edlotab); env->globalInputAssertion(__FILE__, __LINE__, "SCOTCH_dgraphBuild", !ierr, BASIC_ASSERTION, problemComm); // Create array for Scotch to return results in. ArrayRCP<part_t> partList(new part_t[nVtx], 0, nVtx,true); SCOTCH_Num *partloctab = NULL; if (nVtx && (sizeof(SCOTCH_Num) == sizeof(part_t))) { // Can write directly into the solution's memory partloctab = (SCOTCH_Num *) partList.getRawPtr(); } else { // Can't use solution memory directly; will have to copy later. // Note: Scotch does not like NULL arrays, so add 1 to always have non-null. // ParMETIS has this same "feature." See Zoltan bug 4299. partloctab = new SCOTCH_Num[nVtx+1]; } // Get target part sizes float *partsizes = new float[numGlobalParts]; if (!solution->criteriaHasUniformPartSizes(0)) for (size_t i=0; i<numGlobalParts; i++) partsizes[i] = solution->getCriteriaPartSize(0, i); else for (size_t i=0; i<numGlobalParts; i++) partsizes[i] = 1.0 / float(numGlobalParts); // Allocate and initialize PTScotch target architecture data structure SCOTCH_Arch archdat; SCOTCH_archInit(&archdat); SCOTCH_Num velosum = 0; SCOTCH_dgraphSize (gr, &velosum, NULL, NULL, NULL); SCOTCH_Num *goalsizes = new SCOTCH_Num[partnbr]; // TODO: The goalsizes are set as in Zoltan; not sure it is correct there // or here. // It appears velosum is global NUMBER of vertices, not global total // vertex weight. I think we should use the latter. // Fix this when we add vertex weights. for (SCOTCH_Num i = 0; i < partnbr; i++) goalsizes[i] = SCOTCH_Num(ceil(velosum * partsizes[i])); delete [] partsizes; SCOTCH_archCmpltw(&archdat, partnbr, goalsizes); // Call partitioning; result returned in partloctab. ierr = SCOTCH_dgraphMap(gr, &archdat, &stratstr, partloctab); env->globalInputAssertion(__FILE__, __LINE__, "SCOTCH_dgraphMap", !ierr, BASIC_ASSERTION, problemComm); SCOTCH_archExit(&archdat); delete [] goalsizes; // TODO - metrics #ifdef SHOW_ZOLTAN2_SCOTCH_MEMORY int me = env->comm_->getRank(); #endif #ifdef HAVE_SCOTCH_ZOLTAN2_GETMEMORYMAX if (me == 0){ size_t scotchBytes = SCOTCH_getMemoryMax(); std::cout << "Rank " << me << ": Maximum bytes used by Scotch: "; std::cout << scotchBytes << std::endl; } #endif // Clean up PTScotch SCOTCH_dgraphExit(gr); free(gr); SCOTCH_stratExit(&stratstr); // Load answer into the solution. if ((sizeof(SCOTCH_Num) != sizeof(part_t)) || (nVtx == 0)) { for (size_t i = 0; i < nVtx; i++) partList[i] = partloctab[i]; delete [] partloctab; } solution->setParts(partList); env->memory("Zoltan2-Scotch: After creating solution"); // Clean up copies made due to differing data sizes. TPL_Traits<SCOTCH_Num, lno_t>::DELETE_TPL_T_ARRAY(&vertloctab); TPL_Traits<SCOTCH_Num, gno_t>::DELETE_TPL_T_ARRAY(&edgeloctab); if (nVwgts) delete [] velotab; if (nEwgts) delete [] edlotab; #else // DO NOT HAVE_MPI // TODO: Handle serial case with calls to Scotch. // TODO: For now, assign everything to rank 0 and assume only one part. // TODO: Can probably use the code above for loading solution, // TODO: instead of duplicating it here. // TODO // TODO: Actual logic should call Scotch when number of processes == 1. ArrayView<const gno_t> vtxID; ArrayView<StridedData<lno_t, scalar_t> > xyz; ArrayView<StridedData<lno_t, scalar_t> > vwgts; size_t nVtx = model->getVertexList(vtxID, xyz, vwgts); ArrayRCP<part_t> partList(new part_t[nVtx], 0, nVtx, true); for (size_t i = 0; i < nVtx; i++) partList[i] = 0; solution->setParts(partList); #endif // DO NOT HAVE_MPI }
void AlgParMETIS<Adapter>::partition( const RCP<PartitioningSolution<Adapter> > &solution ) { HELLO; size_t numGlobalParts = solution->getTargetGlobalNumberOfParts(); int me = problemComm->getRank(); int np = problemComm->getSize(); // Get vertex info ArrayView<const gno_t> vtxgnos; ArrayView<StridedData<lno_t, scalar_t> > vwgts; int nVwgt = model->getNumWeightsPerVertex(); size_t nVtx = model->getVertexList(vtxgnos, vwgts); pm_idx_t pm_nVtx; TPL_Traits<pm_idx_t,size_t>::ASSIGN(pm_nVtx, nVtx); pm_idx_t *pm_vwgts = NULL; if (nVwgt) { pm_vwgts = new pm_idx_t[nVtx*nVwgt]; scale_weights(nVtx, vwgts, pm_vwgts); } // Get edge info ArrayView<const gno_t> adjgnos; ArrayView<const lno_t> offsets; ArrayView<StridedData<lno_t, scalar_t> > ewgts; int nEwgt = model->getNumWeightsPerEdge(); size_t nEdge = model->getEdgeList(adjgnos, offsets, ewgts); pm_idx_t *pm_ewgts = NULL; if (nEwgt) { pm_ewgts = new pm_idx_t[nEdge*nEwgt]; scale_weights(nEdge, ewgts, pm_ewgts); } // Convert index types for edges, if needed pm_idx_t *pm_offsets; TPL_Traits<pm_idx_t,const lno_t>::ASSIGN_ARRAY(&pm_offsets, offsets); pm_idx_t *pm_adjs; pm_idx_t pm_dummy_adj; if (nEdge) TPL_Traits<pm_idx_t,const gno_t>::ASSIGN_ARRAY(&pm_adjs, adjgnos); else pm_adjs = &pm_dummy_adj; // ParMETIS does not like NULL pm_adjs; // Build vtxdist pm_idx_t *pm_vtxdist; ArrayView<size_t> vtxdist; model->getVertexDist(vtxdist); TPL_Traits<pm_idx_t,size_t>::ASSIGN_ARRAY(&pm_vtxdist, vtxdist); // ParMETIS does not like processors having no vertices. // Inspect vtxdist and remove from communicator procs that have no vertices RCP<Comm<int> > subcomm; MPI_Comm mpicomm; // Note: mpicomm is valid only while subcomm is in scope if (np > 1) { int nKeep = 0; Array<int> keepRanks(np); for (int i = 0; i < np; i++) { if ((pm_vtxdist[i+1] - pm_vtxdist[i]) > 0) { keepRanks[nKeep] = i; pm_vtxdist[nKeep] = pm_vtxdist[i]; nKeep++; } } pm_vtxdist[nKeep] = pm_vtxdist[np]; if (nKeep < np) { subcomm = problemComm->createSubcommunicator(keepRanks.view(0,nKeep)); if (subcomm != Teuchos::null) mpicomm = Teuchos::getRawMpiComm(*subcomm); else mpicomm = MPI_COMM_NULL; } else { mpicomm = Teuchos::getRawMpiComm(*problemComm); } } else { mpicomm = Teuchos::getRawMpiComm(*problemComm); } // Create array for ParMETIS to return results in. pm_idx_t *pm_partList = NULL; if (nVtx) pm_partList = new pm_idx_t[nVtx]; for (size_t i = 0; i < nVtx; i++) pm_partList[i] = 0; int pm_return = METIS_OK; if (mpicomm != MPI_COMM_NULL) { // If in ParMETIS' communicator (i.e., have vertices), call ParMETIS // Get target part sizes pm_idx_t pm_nCon = (nVwgt == 0 ? 1 : pm_idx_t(nVwgt)); pm_real_t *pm_partsizes = new pm_real_t[numGlobalParts*pm_nCon]; for (pm_idx_t dim = 0; dim < pm_nCon; dim++) { if (!solution->criteriaHasUniformPartSizes(dim)) for (size_t i=0; i<numGlobalParts; i++) pm_partsizes[i*pm_nCon+dim] = pm_real_t(solution->getCriteriaPartSize(dim,i)); else for (size_t i=0; i<numGlobalParts; i++) pm_partsizes[i*pm_nCon+dim] = pm_real_t(1.)/pm_real_t(numGlobalParts); } // Get imbalance tolerances double tolerance = 1.1; const Teuchos::ParameterList &pl = env->getParameters(); const Teuchos::ParameterEntry *pe = pl.getEntryPtr("imbalance_tolerance"); if (pe) tolerance = pe->getValue<double>(&tolerance); // ParMETIS requires tolerance to be greater than 1.0; // fudge it if condition is not met if (tolerance <= 1.0) { if (me == 0) std::cerr << "Warning: ParMETIS requires imbalance_tolerance > 1.0; " << "to comply, Zoltan2 reset imbalance_tolerance to 1.01." << std::endl; tolerance = 1.01; } pm_real_t *pm_imbTols = new pm_real_t[pm_nCon]; for (pm_idx_t dim = 0; dim < pm_nCon; dim++) pm_imbTols[dim] = pm_real_t(tolerance); std::string parmetis_method("PARTKWAY"); pe = pl.getEntryPtr("partitioning_approach"); if (pe) { std::string approach; approach = pe->getValue<std::string>(&approach); if ((approach == "repartition") || (approach == "maximize_overlap")) { if (np > 1) // ParMETIS_V3_AdaptiveRepart requires two or more processors parmetis_method = "ADAPTIVE_REPART"; else parmetis_method = "REFINE_KWAY"; } } // Other ParMETIS parameters? pm_idx_t pm_wgtflag = 2*(nVwgt > 0) + (nEwgt > 0); pm_idx_t pm_numflag = 0; pm_idx_t pm_edgecut = -1; pm_idx_t pm_options[METIS_NOPTIONS]; pm_options[0] = 1; // Use non-default options for some ParMETIS options for (int i = 0; i < METIS_NOPTIONS; i++) pm_options[i] = 0; // Default options pm_options[2] = 15; // Matches default value used in Zoltan pm_idx_t pm_nPart; TPL_Traits<pm_idx_t,size_t>::ASSIGN(pm_nPart, numGlobalParts); if (parmetis_method == "PARTKWAY") { pm_return = ParMETIS_V3_PartKway(pm_vtxdist, pm_offsets, pm_adjs, pm_vwgts, pm_ewgts, &pm_wgtflag, &pm_numflag, &pm_nCon, &pm_nPart, pm_partsizes, pm_imbTols, pm_options, &pm_edgecut, pm_partList, &mpicomm); } else if (parmetis_method == "ADAPTIVE_REPART") { // Get object sizes: pm_vsize // TODO: get pm_vsize info from input adapter or graph model // TODO: This is just a placeholder pm_idx_t *pm_vsize = new pm_idx_t[nVtx]; for (size_t i = 0; i < nVtx; i++) pm_vsize[i] = 1; pm_real_t itr = 100.; // Same default as in Zoltan pm_return = ParMETIS_V3_AdaptiveRepart(pm_vtxdist, pm_offsets, pm_adjs, pm_vwgts, pm_vsize, pm_ewgts, &pm_wgtflag, &pm_numflag, &pm_nCon, &pm_nPart, pm_partsizes, pm_imbTols, &itr, pm_options, &pm_edgecut, pm_partList, &mpicomm); delete [] pm_vsize; } else if (parmetis_method == "REFINE_KWAY") { pm_return = ParMETIS_V3_RefineKway(pm_vtxdist, pm_offsets, pm_adjs, pm_vwgts, pm_ewgts, &pm_wgtflag, &pm_numflag, &pm_nCon, &pm_nPart, pm_partsizes, pm_imbTols, pm_options, &pm_edgecut, pm_partList, &mpicomm); } // Clean up delete [] pm_partsizes; delete [] pm_imbTols; } // Load answer into the solution. ArrayRCP<part_t> partList; if (nVtx) TPL_Traits<part_t, pm_idx_t>::SAVE_ARRAYRCP(&partList, pm_partList, nVtx); TPL_Traits<pm_idx_t, part_t>::DELETE_ARRAY(&pm_partList); solution->setParts(partList); env->memory("Zoltan2-ParMETIS: After creating solution"); // Clean up copies made due to differing data sizes. TPL_Traits<pm_idx_t,size_t>::DELETE_ARRAY(&pm_vtxdist); TPL_Traits<pm_idx_t,const lno_t>::DELETE_ARRAY(&pm_offsets); if (nEdge) TPL_Traits<pm_idx_t,const gno_t>::DELETE_ARRAY(&pm_adjs); if (nVwgt) delete [] pm_vwgts; if (nEwgt) delete [] pm_ewgts; if (pm_return != METIS_OK) { throw std::runtime_error( "\nParMETIS returned an error; no valid partition generated.\n" "Look for 'PARMETIS ERROR' in your output for more details.\n"); } }
FLOAT inside_outside(grammar g, const si_t si, FILE *yieldfp, FILE *tracefp, FILE *summaryfp, int debuglevel, int maxsentlen, int minits, int maxits, FLOAT stoptol, FLOAT minruleprob, FLOAT jitter, int VariationalBayes, FLOAT wordscale, FLOAT annealstart, FLOAT annealstop, int nanneal, int weighted_yields_flag) { FLOAT *rule_counts = CALLOC(g->nrules, sizeof(FLOAT)); FLOAT sum_neglog_prob0; FLOAT sum_neglog_prob; int iteration = 0; size_t nrules, nrules0; FLOAT sum_yieldweights; FLOAT temperature = annealstart; nrules = g->nrules; if (summaryfp && debuglevel >= 1000) { if (debuglevel < 5000) fprintf(summaryfp, "# Iteration\ttemperature\tnrules\t-logP\tbits/token\n%d\t%g\t%d", iteration, temperature, (int) nrules); else fprintf(summaryfp, "# Iteration %d, temperature = %g, %d rules, ", iteration, temperature, (int) nrules); fflush(summaryfp); } sum_neglog_prob0 = expected_rule_counts(g, si, yieldfp, tracefp, summaryfp, debuglevel, maxsentlen, minruleprob, wordscale, rule_counts, &sum_yieldweights, weighted_yields_flag); if (summaryfp && debuglevel >= 1000) { if (debuglevel < 5000) fprintf(summaryfp, "\t%g\t%g\n", sum_neglog_prob0, sum_neglog_prob0/(log(2)*(sum_yieldweights))); else fprintf(summaryfp, "-logP = %g, bits/token = %g.\n", sum_neglog_prob0, sum_neglog_prob0/(log(2)*(sum_yieldweights))); fflush(summaryfp); } if (tracefp && debuglevel >= 10000) { write_rule_values(tracefp, g, si, rule_counts, 0); fprintf(tracefp, "\n"); fflush(tracefp); } if (summaryfp && debuglevel >= 5000 && debuglevel < 10000) write_grammar(summaryfp, g, si, minruleprob); while (1) { ++iteration; add_bias(g, rule_counts); set_rule_weights(g, rule_counts, VariationalBayes); prune_grammar(g, si, minruleprob); if (jitter != 0) jitter_weights(g, jitter); set_rule_weights(g, g->weights, 0); if (iteration < nanneal) { temperature = annealstart*pow(annealstop/annealstart, (iteration-1.0)/(nanneal-1.0)); scale_weights(g, 1.0/temperature); } else temperature = 1.0; nrules0 = nrules; nrules = g->nrules; if (summaryfp && debuglevel >= 1000) { if (debuglevel < 5000) fprintf(summaryfp, "%d\t%g\t%d", iteration, temperature, (int) nrules); else fprintf(summaryfp, "# Iteration %d, temperature %g, %d rules, ", iteration, temperature, (int) nrules); fflush(summaryfp); } sum_neglog_prob = expected_rule_counts(g, si, yieldfp, tracefp, summaryfp, debuglevel, maxsentlen, minruleprob, wordscale, rule_counts, &sum_yieldweights, weighted_yields_flag); if (summaryfp && debuglevel >= 1000) { if (debuglevel < 5000) fprintf(summaryfp, "\t%g\t%g\n", sum_neglog_prob, sum_neglog_prob/(log(2)*(sum_yieldweights))); else fprintf(summaryfp, "-logP = %g, bits/token = %g.\n", sum_neglog_prob, sum_neglog_prob/(log(2)*(sum_yieldweights))); fflush(summaryfp); } if (tracefp && debuglevel >= 10000) { write_rule_values(tracefp, g, si, rule_counts, 0); fprintf(tracefp, "\n"); fflush(tracefp); } if (summaryfp && debuglevel >= 5000 && debuglevel < 10000) write_grammar(summaryfp, g, si, minruleprob); if (nrules==nrules0 && iteration >= minits && ((maxits > 0 && iteration >= maxits) || (sum_neglog_prob0-sum_neglog_prob)/fabs(sum_neglog_prob) < stoptol)) break; sum_neglog_prob0 = sum_neglog_prob; } FREE(rule_counts); return(sum_neglog_prob/(log(2)*sum_yieldweights)); }
void AlgParMETIS<Adapter>::partition( const RCP<PartitioningSolution<Adapter> > &solution ) { HELLO; size_t numGlobalParts = solution->getTargetGlobalNumberOfParts(); int np = problemComm->getSize(); // Get vertex info ArrayView<const gno_t> vtxgnos; ArrayView<StridedData<lno_t, scalar_t> > xyz; ArrayView<StridedData<lno_t, scalar_t> > vwgts; int nVwgt = model->getNumWeightsPerVertex(); size_t nVtx = model->getVertexList(vtxgnos, xyz, vwgts); pm_idx_t pm_nVtx; TPL_Traits<pm_idx_t,size_t>::ASSIGN_TPL_T(pm_nVtx, nVtx, env); pm_idx_t *pm_vwgts = NULL; if (nVwgt) { pm_vwgts = new pm_idx_t[nVtx*nVwgt]; scale_weights(nVtx, vwgts, pm_vwgts); } // Get edge info ArrayView<const gno_t> adjgnos; ArrayView<const int> procs; ArrayView<const lno_t> offsets; ArrayView<StridedData<lno_t, scalar_t> > ewgts; int nEwgt = model->getNumWeightsPerEdge(); size_t nEdge = model->getEdgeList(adjgnos, procs, offsets, ewgts); pm_idx_t *pm_ewgts = NULL; if (nEwgt) { pm_ewgts = new pm_idx_t[nEdge*nEwgt]; scale_weights(nEdge, ewgts, pm_ewgts); } // Convert index types for edges, if needed pm_idx_t *pm_offsets; TPL_Traits<pm_idx_t,lno_t>::ASSIGN_TPL_T_ARRAY(&pm_offsets, offsets, env); pm_idx_t *pm_adjs; TPL_Traits<pm_idx_t,gno_t>::ASSIGN_TPL_T_ARRAY(&pm_adjs, adjgnos, env); // Build vtxdist pm_idx_t *pm_vtxdist = new pm_idx_t[np+1]; pm_vtxdist[0] = 0; Teuchos::gatherAll(*problemComm, 1, &pm_nVtx, np, &(pm_vtxdist[1])); for (int i = 2; i <= np; i++) pm_vtxdist[i] += pm_vtxdist[i-1]; // Create array for ParMETIS to return results in. // Note: ParMETIS does not like NULL arrays, // so add 1 to always have non-null. // See Zoltan bug 4299. pm_idx_t *pm_partList = new pm_idx_t[nVtx+1]; // Get target part sizes and imbalance tolerances pm_idx_t pm_nCon = (nVwgt == 0 ? 1 : pm_idx_t(nVwgt)); pm_real_t *pm_partsizes = new pm_real_t[numGlobalParts*pm_nCon]; for (pm_idx_t dim = 0; dim < pm_nCon; dim++) { if (!solution->criteriaHasUniformPartSizes(dim)) for (size_t i=0; i<numGlobalParts; i++) pm_partsizes[i*pm_nCon+dim] = pm_real_t(solution->getCriteriaPartSize(dim,i)); else for (size_t i=0; i<numGlobalParts; i++) pm_partsizes[i*pm_nCon+dim] = pm_real_t(1.) / pm_real_t(numGlobalParts); } pm_real_t *pm_imbTols = new pm_real_t[pm_nCon]; for (pm_idx_t dim = 0; dim < pm_nCon; dim++) pm_imbTols[dim] = 1.05; // TODO: GET THE PARAMETER std::string parmetis_method("PARTKWAY"); pm_idx_t pm_wgtflag = 2*(nVwgt > 0) + (nEwgt > 0); pm_idx_t pm_numflag = 0; pm_idx_t pm_nPart; TPL_Traits<pm_idx_t,size_t>::ASSIGN_TPL_T(pm_nPart, numGlobalParts, env); if (parmetis_method == "PARTKWAY") { pm_idx_t pm_edgecut = -1; pm_idx_t pm_options[3]; pm_options[0] = 0; // Use default options pm_options[1] = 0; // Debug level (ignored if pm_options[0] == 0) pm_options[2] = 0; // Seed (ignored if pm_options[0] == 0) ParMETIS_V3_PartKway(pm_vtxdist, pm_offsets, pm_adjs, pm_vwgts, pm_ewgts, &pm_wgtflag, &pm_numflag, &pm_nCon, &pm_nPart, pm_partsizes, pm_imbTols, pm_options, &pm_edgecut, pm_partList, &mpicomm); } else if (parmetis_method == "ADAPTIVE_REPART") { // Get object sizes std::cout << "NOT READY FOR ADAPTIVE_REPART YET" << std::endl; exit(-1); } else if (parmetis_method == "PART_GEOM") { // Get coordinate info, too. std::cout << "NOT READY FOR PART_GEOM YET" << std::endl; exit(-1); } // Clean up delete [] pm_vtxdist; delete [] pm_partsizes; delete [] pm_imbTols; // Load answer into the solution. ArrayRCP<part_t> partList; if (TPL_Traits<pm_idx_t, part_t>::OK_TO_CAST_TPL_T()) { partList = ArrayRCP<part_t>((part_t *)pm_partList, 0, nVtx, true); } else { // TODO Probably should have a TPL_Traits function to do the following partList = ArrayRCP<part_t>(new part_t[nVtx], 0, nVtx, true); for (size_t i = 0; i < nVtx; i++) { partList[i] = part_t(pm_partList[i]); } delete [] pm_partList; } solution->setParts(partList); env->memory("Zoltan2-ParMETIS: After creating solution"); // Clean up copies made due to differing data sizes. TPL_Traits<pm_idx_t,lno_t>::DELETE_TPL_T_ARRAY(&pm_offsets); TPL_Traits<pm_idx_t,gno_t>::DELETE_TPL_T_ARRAY(&pm_adjs); if (nVwgt) delete [] pm_vwgts; if (nEwgt) delete [] pm_ewgts; }