/************************************************************************** * mexFunction: gateway routine for MATLAB interface. ***************************************************************************/ void mexFunction (int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { // Argument checking if (nrhs != 9) mexErrMsgIdAndTxt(FUNC_NAME, "Wrong input."); if (nlhs != 3) mexErrMsgIdAndTxt(FUNC_NAME, "Wrong output."); // Input and output variables idx_t ne = (idx_t) mxGetScalar(ne_in); idx_t nn = (idx_t) mxGetScalar(nn_in); idx_t *eptr; GetIdxArray(eptr_in,&eptr); idx_t *eind; GetIdxArray(eind_in,&eind); idx_t *vwgt; GetIdxArray(vwgt_in,&vwgt); idx_t *vsize; GetIdxArray(vsize_in,&vsize); idx_t nparts = (idx_t) mxGetScalar(nparts_in); real_t *tpwgts; GetRealArray(tpwgts_in,&tpwgts); idx_t options[METIS_NOPTIONS]; GetOptions(options_in, options); idx_t objval; idx_t *epart = (idx_t*) mxCalloc (ne, sizeof(idx_t)); idx_t *npart = (idx_t*) mxCalloc (nn, sizeof(idx_t)); // Metis main function int info = METIS_PartMeshNodal( &ne, &nn, eptr, eind, vwgt, vsize, &nparts, tpwgts, options, &objval, epart, npart); CheckReturn(info, FUNC_NAME); // Output objval_out = mxCreateDoubleScalar( (double) objval ); epart_out = mxCreateDoubleMatrix(1,ne,mxREAL); mxSetData(epart_out,mxMalloc(sizeof(double)*ne)); double *epart_out_pr = mxGetPr(epart_out); for(idx_t i=0; i<ne; i++) epart_out_pr[i] = (double) epart[i]; npart_out = mxCreateDoubleMatrix(1,nn,mxREAL); mxSetData(npart_out,mxMalloc(sizeof(double)*nn)); double *npart_out_pr = mxGetPr(npart_out); for(idx_t i=0; i<nn; i++) npart_out_pr[i] = (double) npart[i]; }
void metis_partmeshnodal__(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, int *nparts, int *edgecut, idxtype *epart, idxtype *npart) { METIS_PartMeshNodal(ne, nn, elmnts, etype, numflag, nparts, edgecut, epart, npart); }
void METIS_PARTMESHNODAL(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, int *nparts, int *edgecut, idxtype *epart, idxtype *npart) { METIS_PartMeshNodal(ne, nn, elmnts, etype, numflag, nparts, edgecut, epart, npart); }
void bridge_partMeshNodal(int * ne, int * nn, idxtype * elmnts, int * etype, int * numflag, int * nparts, int * edgecut, idxtype * epart, idxtype * npart){ METIS_PartMeshNodal(ne, nn, elmnts, etype, numflag, nparts, edgecut, epart, npart); }
/* * Assign loop iterations to tiles carving partitions out of /seedLoop/ using * the METIS library. */ static int* metis(loop_t* seedLoop, int tileSize, map_list* meshMaps, int* nCore, int* nExec, int* nNonExec, int nThreads) { int i; int setCore = seedLoop->set->core; int setSize = seedLoop->set->size; // use the mesh description to find a suitable map for partitioning through METIS map_t* map = NULL; map_list::const_iterator it, end; for (it = meshMaps->begin(), end = meshMaps->end(); it != end; it++) { if (set_eq(seedLoop->set, (*it)->inSet) || set_eq(seedLoop->set, (*it)->outSet)) { map = *it; break; } } if (! map) { // unfortunate scenario: the user provided a mesh description, but the loop picked // as seed has an iteration space which is not part of the mesh description. // will have to revert to chunk partitioning return NULL; } // now partition through METIS: // ... mesh geometry int nElements = map->inSet->size; int nNodes = map->outSet->size; int nParts = nElements / tileSize; int arity = map->size / nElements; // ... data needed for partitioning int* indMap = new int[nElements]; int* indNodesMap = new int[nNodes]; int* adjncy = map->values; int* offsets = new int[nElements+1](); for (i = 1; i < nElements+1; i++) { offsets[i] = offsets[i-1] + arity; } // ... options int result, objval, ncon = 1; int options[METIS_NOPTIONS]; METIS_SetDefaultOptions(options); options[METIS_OPTION_NUMBERING] = 0; options[METIS_OPTION_CONTIG] = 1; // ... do partition! result = (arity == 2) ? METIS_PartGraphKway (&nNodes, &ncon, offsets, adjncy, NULL, NULL, NULL, &nParts, NULL, NULL, options, &objval, indMap) : METIS_PartMeshNodal (&nElements, &nNodes, offsets, adjncy, NULL, NULL, &nParts, NULL, options, &objval, indMap, indNodesMap); ASSERT(result == METIS_OK, "Invalid METIS partitioning"); // what's the target iteration set ? if (set_eq(seedLoop->set, map->inSet)) { delete[] indNodesMap; } else { // note: must be set_eq(seedLoop->set, map-outSet) delete[] indMap; indMap = indNodesMap; } delete[] offsets; // restrict partitions to the core region std::fill (indMap + setCore, indMap + setSize, 0); std::set<int> partitions (indMap, indMap + setCore); // ensure the set of partitions IDs is compact (i.e., if we have a partitioning // 0: {0,1,...}, 1: {4,5,...}, 2: {}, 3: {6,10,...} ... // we instead want to have // 0: {0,1,...}, 1: {4,5,...}, 2: {6,10,...}, ... std::map<int, int> mapper; std::set<int>::const_iterator sIt, sEnd; for (i = 0, sIt = partitions.begin(), sEnd = partitions.end(); sIt != sEnd; sIt++, i++) { mapper[*sIt] = i; } for (i = 0; i < setCore; i++) { indMap[i] = mapper[indMap[i]]; } *nCore = partitions.size(); // partition the exec halo region chunk_halo (seedLoop, tileSize, *nCore - 1, indMap, nExec, nNonExec, nThreads); return indMap; }
/************************************************************************* * Let the game begin **************************************************************************/ main(int argc, char *argv[]) { int i, j, ne, nn, etype, numflag=0, nparts, edgecut; idxtype *elmnts, *epart, *npart; timer IOTmr, DUALTmr; char etypestr[4][5] = {"TRI", "TET", "HEX", "QUAD"}; GraphType graph; if (argc != 3) { printf("Usage: %s <meshfile> <nparts>\n",argv[0]); exit(0); } nparts = atoi(argv[2]); if (nparts < 2) { printf("nparts must be greater than one.\n"); exit(0); } cleartimer(IOTmr); cleartimer(DUALTmr); starttimer(IOTmr); elmnts = ReadMesh(argv[1], &ne, &nn, &etype); stoptimer(IOTmr); epart = idxmalloc(ne, "main: epart"); npart = idxmalloc(nn, "main: npart"); printf("**********************************************************************\n"); printf("%s", METISTITLE); printf("Mesh Information ----------------------------------------------------\n"); printf(" Name: %s, #Elements: %d, #Nodes: %d, Etype: %s\n\n", argv[1], ne, nn, etypestr[etype-1]); printf("Partitioning Nodal Graph... -----------------------------------------\n"); starttimer(DUALTmr); METIS_PartMeshNodal(&ne, &nn, elmnts, &etype, &numflag, &nparts, &edgecut, epart, npart); stoptimer(DUALTmr); printf(" %d-way Edge-Cut: %7d, Balance: %5.2f\n", nparts, edgecut, ComputeElementBalance(ne, nparts, epart)); starttimer(IOTmr); WriteMeshPartition(argv[1], nparts, ne, epart, nn, npart); stoptimer(IOTmr); printf("\nTiming Information --------------------------------------------------\n"); printf(" I/O: \t\t %7.3f\n", gettimer(IOTmr)); printf(" Partitioning: \t\t %7.3f\n", gettimer(DUALTmr)); printf("**********************************************************************\n"); /* graph.nvtxs = ne; graph.xadj = idxmalloc(ne+1, "xadj"); graph.vwgt = idxsmalloc(ne, 1, "vwgt"); graph.adjncy = idxmalloc(10*ne, "adjncy"); graph.adjwgt = idxsmalloc(10*ne, 1, "adjncy"); METIS_MeshToDual(&ne, &nn, elmnts, &etype, &numflag, graph.xadj, graph.adjncy); ComputePartitionInfo(&graph, nparts, epart); GKfree(&graph.xadj, &graph.adjncy, &graph.vwgt, &graph.adjwgt, LTERM); */ GKfree(&elmnts, &epart, &npart, LTERM); }
int initialization(char* file_in, char* part_type, int* nintci, int* nintcf, int* nextci, int* nextcf, int*** lcc, double** bs, double** be, double** bn, double** bw, double** bl, double** bh, double** bp, double** su, int* points_count, int*** points, int** elems, double** var, double** cgup, double** oc, double** cnorm, int** local_global_index, int** global_local_index, int* neighbors_count, int** send_count, int*** send_list, int** recv_count, int*** recv_list, int** epart, int** npart, int** objval, int* num_elems_local) { /********** START INITIALIZATION **********/ int i = 0; int j = 0; int num_elems_pro; // number of elements in each processor int my_rank, num_procs; /// Boundary coefficients for each volume cell (South, East, North, West, High, Low) double *bs_a, *be_a, *bn_a, *bw_a, *bl_a, *bh_a; double *bp_a; /// Pole coefficient double *su_a; /// Source values int** lcc_a; /// link cell-to-cell array - stores neighboring information int** lcc_b; MPI_Status status; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /// Get current process id MPI_Comm_size(MPI_COMM_WORLD, &num_procs); /// get number of processe // read-in the input file by one processor if ( my_rank == 0 ) { int f_status = read_binary_geo(file_in, &*nintci, &*nintcf, &*nextci, &*nextcf, &lcc_a, &bs_a, &be_a, &bn_a, &bw_a, &bl_a, &bh_a, &bp_a, &su_a, &*points_count, &*points, &*elems); if ( f_status != 0 ) return f_status; } // Send the common information to other processors MPI_Bcast(nintci, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(nintcf, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(nextci, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(nextcf, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(points_count, 1, MPI_INT, 0, MPI_COMM_WORLD); // local arrays and share parameters int num_elems = *nintcf - *nintci + 1; if (my_rank != 0) { *elems = (int*) calloc(sizeof(int), num_elems * 8); } MPI_Bcast(*elems, num_elems * 8, MPI_INT, 0, MPI_COMM_WORLD); int points_num = *points_count; int npro = num_elems / num_procs; int exter = *nextcf - *nextci + 1; int remain = 0; int *k = (int*) calloc(sizeof(int), num_procs); int *k_sum = (int*) calloc(sizeof(int), num_procs); int last_proc = num_procs - 1; if (my_rank == last_proc) { remain = num_elems % num_procs; } int local_array_size = npro + remain + exter; *local_global_index = (int*) calloc(sizeof(int), npro + remain + exter); *global_local_index = (int*) calloc(sizeof(int), num_elems); *bs = (double*) calloc(sizeof(double), (local_array_size)); *bn = (double*) calloc(sizeof(double), (local_array_size)); *bw = (double*) calloc(sizeof(double), (local_array_size)); *be = (double*) calloc(sizeof(double), (local_array_size)); *bl = (double*) calloc(sizeof(double), (local_array_size)); *bh = (double*) calloc(sizeof(double), (local_array_size)); *bp = (double*) calloc(sizeof(double), (local_array_size)); *su = (double*) calloc(sizeof(double), (local_array_size)); *var = (double*) calloc(sizeof(double), (local_array_size)); *cgup = (double*) calloc(sizeof(double), (local_array_size)); *oc = (double*) calloc(sizeof(double), (npro + remain)); *cnorm = (double*) calloc(sizeof(double), (npro + remain)); *lcc = (int**) calloc(sizeof(int*), (local_array_size)); for ( i = 0; i < local_array_size; i++ ) { (*lcc)[i] = (int *) calloc(sizeof(int), (6)); } int *data = (int *) calloc(sizeof(int), (num_elems*6)); lcc_b = (int **) calloc(sizeof(int*), (num_elems)); for (i = 0; i < num_elems; i++) { lcc_b[i] = &(data[6 * i]); } if ( my_rank == 0 ) { for ( i = 0; i< num_elems; i++ ) { for ( j = 0; j < 6; j++ ) { lcc_b[i][j] = lcc_a[i][j]; } } } MPI_Bcast(&(lcc_b[0][0]), num_elems*6, MPI_INT, 0, MPI_COMM_WORLD); // choose part type if (strcmp(part_type, "classical") == 0) { k[my_rank] = npro + remain; (num_elems_pro) = npro + remain; int p = 0; for (p = 0; p < num_procs; p++) { MPI_Bcast(&k[p], 1, MPI_INT, p, MPI_COMM_WORLD); } if (my_rank == 0) { for (i = 1; i < num_procs; i++) { k_sum[i] = k_sum[i-1] + k[i-1]; } } // ditribute all B* array MPI_Scatterv(bs_a, k, k_sum, MPI_DOUBLE, *bs, k[my_rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatterv(bn_a, k, k_sum, MPI_DOUBLE, *bn, k[my_rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatterv(bw_a, k, k_sum, MPI_DOUBLE, *bw, k[my_rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatterv(be_a, k, k_sum, MPI_DOUBLE, *be, k[my_rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatterv(bl_a, k, k_sum, MPI_DOUBLE, *bl, k[my_rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatterv(bh_a, k, k_sum, MPI_DOUBLE, *bh, k[my_rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatterv(bp_a, k, k_sum, MPI_DOUBLE, *bp, k[my_rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatterv(su_a, k, k_sum, MPI_DOUBLE, *su, k[my_rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); for (i = 0; i < num_elems_pro; i++) { (*local_global_index)[i] = my_rank * npro + i; for (j = 0; j < 6; j++) { (*lcc)[i][j] = lcc_b[my_rank*npro+i][j]; } } for (i = 0; i < num_elems_pro; i++) { if (i > npro) { (*global_local_index)[my_rank*npro+i] = (my_rank*npro+i) % npro + npro; } else { (*global_local_index)[my_rank*npro+i] = (my_rank*npro+i) % npro; } } // part type is not classics but metis } else { *epart = (int*) calloc(sizeof(int), num_elems); *npart = (int*) calloc(sizeof(int), num_elems*8); // if ( my_rank == 0 ) { // parametes and array for metis partition libary idx_t ne = (idx_t) num_elems; idx_t nn = (idx_t) points_num; idx_t ncommon = 4; idx_t nparts = num_procs; int node_num = ne * 8; idx_t *eptr = (idx_t*) calloc(sizeof(idx_t), num_elems + 1); idx_t *eind = (idx_t*) calloc(sizeof(idx_t), node_num); idx_t objval_METIS; idx_t *epart_METIS = (idx_t*) calloc(sizeof(idx_t), num_elems); idx_t *npart_METIS = (idx_t*) calloc(sizeof(idx_t), node_num); int metis_final; for (i = (*nintci); i <= (*nintcf + 1) ; i++) { eptr[i] = (idx_t) i * 8; } for (i = 0; i < node_num; i++) { eind[i] = (idx_t) (*elems)[i]; } if (strcmp(part_type, "dual") == 0) { metis_final = METIS_PartMeshDual(&ne, &nn, eptr, eind, NULL, NULL, &ncommon, &nparts, NULL, NULL, &objval_METIS, epart_METIS, npart_METIS); } else if (strcmp(part_type, "noda") == 0) { metis_final = METIS_PartMeshNodal(&ne, &nn, eptr, eind, NULL, NULL, &nparts, NULL, NULL, &objval_METIS, epart_METIS, npart_METIS); } if (metis_final != METIS_OK) { printf("Metis part fails\n"); } (*objval) = (int*) calloc(sizeof(int), 1); (*objval)[0] = (int) objval_METIS; for (i = 0; i < num_elems; i++) { (*epart)[i] = (int) epart_METIS[i]; } for (i = 0; i < node_num; i++) { (*npart)[i] = (int) npart_METIS[i]; } // ditribute data according to METIS Partition int p = 0; // store local to global mapping for (p = 0; p < num_procs; p++) { if (my_rank == p) { for (j = 0; j < num_elems; j++) { if ((*epart)[j] == my_rank) { (*local_global_index)[k[my_rank]] = j; for (i = 0; i < 6; i++) { (*lcc)[k[my_rank]][i] = lcc_b[j][i]; } (*global_local_index)[j] = k[my_rank]; k[my_rank] = k[my_rank] + 1; } } } MPI_Bcast(&k[p], 1, MPI_INT, p, MPI_COMM_WORLD); /// send k[p] to other processors } /// finish storing local to global mapping (num_elems_pro) = k[my_rank]; int *local_global_index_sum = (int*) calloc(sizeof(int), num_elems); if (my_rank == 0) { for (i = 1; i < num_procs; i++) { k_sum[i] = k_sum[i-1] + k[i-1]; } } MPI_Gatherv(*local_global_index, k[my_rank], MPI_INT, local_global_index_sum, k, k_sum, MPI_INT, 0, MPI_COMM_WORLD); // copy B* array into new array accoring to order from metis partition double *bs_b = (double*) calloc(sizeof(double), (num_elems)); double *bn_b = (double*) calloc(sizeof(double), (num_elems)); double *bw_b = (double*) calloc(sizeof(double), (num_elems)); double *be_b = (double*) calloc(sizeof(double), (num_elems)); double *bl_b = (double*) calloc(sizeof(double), (num_elems)); double *bh_b = (double*) calloc(sizeof(double), (num_elems)); double *bp_b = (double*) calloc(sizeof(double), (num_elems)); double *su_b = (double*) calloc(sizeof(double), (num_elems)); if (my_rank == 0) { for (i= 0; i < num_elems; i++) { j = local_global_index_sum[i]; bs_b[i] = bs_a[j]; bn_b[i] = bn_a[j]; bw_b[i] = bw_a[j]; be_b[i] = be_a[j]; bl_b[i] = bl_a[j]; bh_b[i] = bh_a[j]; bp_b[i] = bp_a[j]; su_b[i] = su_a[j]; } } MPI_Scatterv(bs_b, k, k_sum , MPI_DOUBLE, *bs, k[my_rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatterv(bn_b, k, k_sum , MPI_DOUBLE, *bn, k[my_rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatterv(bw_b, k, k_sum , MPI_DOUBLE, *bw, k[my_rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatterv(be_b, k, k_sum , MPI_DOUBLE, *be, k[my_rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatterv(bl_b, k, k_sum , MPI_DOUBLE, *bl, k[my_rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatterv(bh_b, k, k_sum , MPI_DOUBLE, *bh, k[my_rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatterv(bp_b, k, k_sum , MPI_DOUBLE, *bp, k[my_rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatterv(su_b, k, k_sum , MPI_DOUBLE, *su, k[my_rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); free(bp_b); free(bh_b); free(bl_b); free(bw_b); free(bn_b); free(be_b); free(bs_b); free(su_b); free(local_global_index_sum); } // finish choose part type section and all local array are stored // initialization computational array for (i = 0; i <= 10; i++) { (*oc)[i] = 0.0; (*cnorm)[i] = 1.0; } for (i = 0; i < num_elems_pro; i++) { (*cgup)[i] = 0.0; (*var)[i] = 0.0; } for (i = num_elems_pro; i < local_array_size; i++) { (*var)[i] = 0.0; (*cgup)[i] = 0.0; (*bs)[i] = 0.0; (*be)[i] = 0.0; (*bn)[i] = 0.0; (*bw)[i] = 0.0; (*bl)[i] = 0.0; (*bh)[i] = 0.0; } for (i = 0; i < num_elems_pro; i++) { (*cgup)[i] = 1.0 / ((*bp)[i]); } // ************Comunication List********* // *num_elems_local = num_elems_pro; *neighbors_count = num_procs-1; *send_count = (int*) calloc(sizeof(int), (num_procs)); *recv_count = (int*) calloc(sizeof(int), (num_procs)); *send_list = (int **) calloc(sizeof(int*), (*neighbors_count + 1)); for (i = 0; i < *neighbors_count + 1; i++) { (*send_list)[i] = (int *) calloc(sizeof(int), (6 * num_elems_pro)); } int num_elems_global = 0; int* rank = (int*) calloc(sizeof(int), (num_procs)); int m = 0; int** count_time_local = (int**) calloc(sizeof(int*), (num_procs)); for (i = 0; i < num_procs; i++) { count_time_local[i] = (int *) calloc(sizeof(int), (num_elems)); } int* count_time = (int*) calloc(sizeof(int), (num_elems)); for (i = 0; i < num_elems_pro; i++) { for (j = 0; j < 6; j++) { num_elems_global = (*lcc)[i][j]; // choose only ghost cell if (num_elems_global < num_elems) { // choose part type if (strcmp(part_type, "classical") == 0) { if (num_elems_global >= npro * num_procs) { rank[my_rank]= num_elems_global / npro - 1; } else { rank[my_rank]= num_elems_global / npro; } } else { rank[my_rank]=(*epart)[num_elems_global]; } /// end choosing part type // record times of this elems occur if (rank[my_rank] != my_rank) { count_time_local[rank[my_rank]][i] = count_time_local[rank[my_rank]][i] + 1; if (count_time_local[rank[my_rank]][i] == 1) { (*send_list)[rank[my_rank]][(*send_count)[rank[my_rank]]] = (*local_global_index)[i]; (*send_count)[rank[my_rank]]=(*send_count)[rank[my_rank]] + 1; } } } /// choose ghost cell } /// end j for loop } /// end i for loop // Set the order in send_list and recv_list same for (i = 0; i < num_procs; i++) { MPI_Sendrecv(&((*send_count)[i]), 1, MPI_INT, i, i * 1000, &((*recv_count)[i]), 1, MPI_INT, i, my_rank * 1000, MPI_COMM_WORLD, &status); } *recv_list = (int **) calloc(sizeof(int*), (*neighbors_count+1)); for (i = 0; i < *neighbors_count+1; i++) { (*recv_list)[i] = (int *) calloc(sizeof(int), ((*recv_count)[i])); } for (i = 0; i < num_procs; i++) { if (my_rank == i) { for (j = 0; j < num_procs; j++) { if (j != my_rank) { MPI_Send((*send_list)[j], (*send_count)[j], MPI_INT, j, 100, MPI_COMM_WORLD); } } } else { MPI_Recv((*recv_list)[i], (*recv_count)[i], MPI_INT, i, 100, MPI_COMM_WORLD, &status); } } free(lcc_b); free(count_time_local); return 0; }
int initialization_metis(char* file_in, char* part_type, int* nintci, int* nintcf, int* nextci, int* nextcf, int*** lcc, double** bs, double** be, double** bn, double** bw, double** bl, double** bh, double** bp, double** su, int* points_count, int*** points, int** elems, double** var, double** cgup, double** oc, double** cnorm, int** local_global_index, int** global_local_index, int* neighbors_count, int** send_count, int*** send_list, int** recv_count, int*** recv_list, int** epart, int** npart, int* objval) { int i = 0, j = 0; int my_rank, num_procs, CHUNKSIZE, CHUNKSIZE_TOT; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /// get current process id MPI_Comm_size(MPI_COMM_WORLD, &num_procs); // define CHUNKSIZE printf("nodal or dual\n"); int CHUNKSIZE0 = (int)ceil((double)(*nintcf - *nintci + 1 ) / (double) num_procs); int REMAINDER = ( *nintcf - *nintci + 1 ) - ( num_procs - 1 ) * CHUNKSIZE0; if (my_rank == num_procs -1) { CHUNKSIZE = REMAINDER; } else { CHUNKSIZE = CHUNKSIZE0; } int CHUNKSIZE0_TOT = (int)ceil((double)(*nextcf - *nintci + 1 ) / (double) num_procs); int REMAINDER_TOT = ( *nextcf - *nintci + 1 ) - ( num_procs - 1 ) * CHUNKSIZE0_TOT; if (my_rank == num_procs -1) { CHUNKSIZE_TOT = REMAINDER_TOT; } else { CHUNKSIZE_TOT = CHUNKSIZE0_TOT; } printf("TEST1\n"); MPI_Barrier( MPI_COMM_WORLD); // initialize global data structures double *gbs, *gbe, *gbn, *gbw, *gbl, *gbh, *gbp, *gsu; int gpoints_count; int** gpoints; int* gelems; double *gvar, *gcgup, *gcnorm; // printf("Process %i initializated function/n", my_rank); // read-in the input file int f_status = read_binary_geo_single( file_in, nintci, nintcf, nextci, nextcf, lcc, &gbs, &gbe, &gbn, &gbw, &gbl, &gbh, &gbp, &gsu, &gpoints_count, &gpoints, &gelems ); printf( "rank %d: binary read in successful code %d \n", my_rank, f_status ); MPI_Barrier( MPI_COMM_WORLD ); if ( f_status != 0 ) return f_status; gvar = (double*) calloc( (*nextcf + 1), sizeof(double) ); gcgup = (double*) calloc( (*nextcf + 1), sizeof(double) ); gcnorm = (double*) calloc( (*nintcf + 1), sizeof(double) ); // initialise the arrays for ( i = 0; i <= 10; i++ ) { gcnorm[i] = 1.0; } for ( i = (*nintci); i <= (*nintcf); i++ ) { gvar[i] = 0.0; } for ( i = (*nintci); i <= (*nintcf); i++ ) { gcgup[i] = 1.0 / ((gbp)[i]); } for ( i = (*nextci); i <= (*nextcf); i++ ) { gvar[i] = 0.0; gcgup[i] = 0.0; gbs[i] = 0.0; gbe[i] = 0.0; gbn[i] = 0.0; gbw[i] = 0.0; gbh[i] = 0.0; gbl[i] = 0.0; } /************METIS************/ idx_t ne; idx_t nn; idx_t *eptr; idx_t *eind; idx_t ncommon; idx_t nparts; idx_t *idx_objval; idx_t *idx_epart; idx_t *idx_npart; ne = ( *nintcf ) - ( *nintci ) + 1; nn = gpoints_count; ncommon = 4; nparts = num_procs; printf("TEST_1\n"); MPI_Barrier( MPI_COMM_WORLD); eptr = (idx_t*) calloc( ( ne + 1 ), sizeof(idx_t) ); eind = (idx_t*) calloc( ( ( ne + 1 ) * 8 ), sizeof(idx_t) ); idx_objval = (idx_t*) calloc( 1, sizeof(idx_t) ); idx_epart = (idx_t*) calloc( ( ne ), sizeof(idx_t) ); idx_npart = (idx_t*) calloc( ( nn ), sizeof(idx_t) ); epart = (int**) calloc( ( ne ) , sizeof(int*) ); npart = (int**) calloc( ( nn ), sizeof(int*) ); for ( i = 0; i < ( ne + 1 ); i++ ) { eptr[i] = i * 8; } for ( i = 0; i < ( ( ne + 1 ) * 8 ); i++ ) { eind[i] = (gelems)[i]; } if ( strcmp( part_type, "dual" ) == 0 ) { METIS_PartMeshDual( &ne, &nn, eptr, eind, NULL, NULL, &ncommon, &nparts, NULL, NULL, idx_objval, idx_epart, idx_npart ); } else { METIS_PartMeshNodal( &ne, &nn, eptr, eind, NULL, NULL, &nparts, NULL, NULL, idx_objval, idx_epart, idx_npart ); } printf("idx_epart[1000]=%d \n", idx_epart[ne-1]); MPI_Barrier( MPI_COMM_WORLD); for (i = 0; i < ne; i++) { epart[i]=(int)idx_epart[i]; } for (i = 0; i < nn; i++) { npart[i]=(int)idx_npart[i]; } *objval=(int)*idx_objval; // local_global_index if ( ( *local_global_index = (int*) calloc( ne , sizeof(int) ) ) == NULL ) { fprintf(stderr, "malloc failed to allocate local_global_index\n"); return -1; } if ( ( *global_local_index = (int*) calloc(ne, sizeof(int) ) ) == NULL ) { fprintf(stderr, "malloc failed to allocate local_global_index\n"); return -1; } j = 0; for (i = 0; i < ne; i++) { if( epart[i] == my_rank ) { (*local_global_index)[j]= i; (*global_local_index)[i]= j; j++; } } printf("local_global_index[100]=%d \n", local_global_index[100]); printf("global_local_index[100]=%d \n", global_local_index[100]); printf("epart[ne-1]=%d \n", epart[ne-1]); MPI_Barrier( MPI_COMM_WORLD); // allocate other arrays if ( (*cgup = (double *) malloc(CHUNKSIZE_TOT * sizeof(double))) == NULL ) { fprintf(stderr, "malloc(SU) failed\n"); return -1; } if ( (*bs = (double *) malloc(CHUNKSIZE_TOT * sizeof(double))) == NULL ) { fprintf(stderr, "malloc(BS) failed\n"); return -1; } if ( (*be = (double *) malloc(CHUNKSIZE_TOT * sizeof(double))) == NULL ) { fprintf(stderr, "malloc(BE) failed\n"); return -1; } if ( (*bn = (double *) malloc(CHUNKSIZE_TOT * sizeof(double))) == NULL ) { fprintf(stderr, "malloc(BN) failed\n"); return -1; } if ( (*bw = (double *) malloc(CHUNKSIZE_TOT * sizeof(double))) == NULL ) { fprintf(stderr, "malloc(BW) failed\n"); return -1; } if ( (*bl = (double *) malloc(CHUNKSIZE_TOT * sizeof(double))) == NULL ) { fprintf(stderr, "malloc(BL) failed\n"); return -1; } if ( (*bh = (double *) malloc(CHUNKSIZE_TOT * sizeof(double))) == NULL ) { fprintf(stderr, "malloc(BH) failed\n"); return -1; } if ( (*bp = (double *) malloc(CHUNKSIZE_TOT * sizeof(double))) == NULL ) { fprintf(stderr, "malloc(BP) failed\n"); return -1; } if ( (*su = (double *) malloc(CHUNKSIZE_TOT * sizeof(double))) == NULL ) { fprintf(stderr, "malloc(SU) failed\n"); return -1; } printf("TEST_2\n"); MPI_Barrier( MPI_COMM_WORLD); j = 0; for ( i = 0; i < ne; i++ ) { if(epart[i] == my_rank) { (*cgup)[j] = gcgup[i]; (*bs)[j] = gbs[i]; (*be)[j] = gbe[i]; (*bn)[j] = gbn[i]; (*bw)[j] = gbw[i]; (*bl)[j] = gbl[i]; (*bh)[j] = gbh[i]; (*bp)[j] = gbp[i]; (*su)[j] = gsu[i]; j++; } } free(gbs); free(gbe); free(gbn); free(gbw); free(gbl); free(gbh); free(gbp); free(gsu); printf("TEST_3\n"); MPI_Barrier( MPI_COMM_WORLD); return 0; }
int initialization(char* file_in, char* part_type, int* nintci, int* nintcf, int* nextci, int* nextcf, int*** lcc, double** bs, double** be, double** bn, double** bw, double** bl, double** bh, double** bp, double** su, int* points_count, int*** points, int** elems, double** var, double** cgup, double** oc, double** cnorm, int** local_global_index, int** global_local_index, int* neighbors_count, int** send_count, int*** send_list, int** recv_count, int*** recv_list, int** epart, int** npart, int** objval, int* num_elems_local) { /********** START INITIALIZATION **********/ int i = 0; int j = 0; int num_elems_pro;//number of elements in each processor int my_rank, num_procs; /// Boundary coefficients for each volume cell (South, East, North, West, High, Low) double *bs_a, *be_a, *bn_a, *bw_a, *bl_a, *bh_a; double *bp_a; /// Pole coefficient double *su_a; /// Source values int** lcc_a; /// link cell-to-cell array - stores neighboring information int** lcc_b; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /// Get current process id MPI_Comm_size(MPI_COMM_WORLD, &num_procs); /// get number of processe // read-in the input file by one processor if ( my_rank == 0 ) { int f_status = read_binary_geo(file_in, &*nintci, &*nintcf, &*nextci, &*nextcf, &lcc_a, &bs_a, &be_a, &bn_a, &bw_a, &bl_a, &bh_a, &bp_a, &su_a, &*points_count, &*points, &*elems); if ( f_status != 0 ) return f_status; } //Send the common information to other processors MPI_Bcast (nintci,1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast (nintcf,1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast (nextci,1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast (nextcf,1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast (points_count,1, MPI_INT, 0, MPI_COMM_WORLD); //local arrays and share parameters int num_elems = *nintcf-*nintci+1; int points_num = *points_count; int npro = num_elems/num_procs; int exter = *nextcf - *nextci + 1; int remain = 0; int *k = (int*) calloc(sizeof(int), num_procs); int *k_sum = (int*) calloc(sizeof(int), num_procs); if (my_rank == (num_procs-1) ) { remain = num_elems % num_procs; } int local_array_size = npro + remain + exter; *local_global_index = (int*) calloc(sizeof(int), npro+remain+exter); *bs = (double*) calloc(sizeof(double), (local_array_size)); *bn = (double*) calloc(sizeof(double), (local_array_size)); *bw = (double*) calloc(sizeof(double), (local_array_size)); *be = (double*) calloc(sizeof(double), (local_array_size)); *bl = (double*) calloc(sizeof(double), (local_array_size)); *bh = (double*) calloc(sizeof(double), (local_array_size)); *bp = (double*) calloc(sizeof(double), (local_array_size)); *su = (double*) calloc(sizeof(double), (local_array_size)); *var = (double*) calloc(sizeof(double), (local_array_size)); *cgup = (double*) calloc(sizeof(double), (local_array_size)); *oc = (double*) calloc(sizeof(double), (npro+remain)); *cnorm = (double*) calloc(sizeof(double), (npro+remain)); *lcc = (int**) calloc(sizeof(int*),(local_array_size)); for ( i = 0; i < local_array_size; i++ ) { (*lcc)[i] = (int *) calloc(sizeof(int),(6)); } int *data = (int *)calloc(sizeof(int),(num_elems*6)); lcc_b = (int **)calloc(sizeof(int*),(num_elems)); for ( i=0; i<num_elems; i++){ lcc_b[i] = &(data[6*i]); } if ( my_rank == 0 ) { for ( i = 0; i< num_elems; i++ ) { for ( j = 0; j < 6; j++ ) { lcc_b[i][j]=lcc_a[i][j]; } } } MPI_Bcast (&(lcc_b[0][0]),num_elems*6, MPI_INT, 0, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); //choose part type if (strcmp(part_type,"classical") == 0) { //int *k_c = (int*) calloc(sizeof(int), num_procs); k[my_rank] = npro + remain; (num_elems_pro) = npro + remain; int p = 0; for ( p = 0; p < num_procs; p++ ) { MPI_Bcast(&k[p],1,MPI_INT,p,MPI_COMM_WORLD); } //int *k_c_sum = (int*) calloc(sizeof(int), num_procs); if ( my_rank == 0 ) { for (i = 1; i < num_procs; i++ ) { k_sum[i]=k_sum[i-1]+k[i-1]; } } //ditribute all B* array MPI_Scatterv(bs_a, k, k_sum, MPI_DOUBLE, *bs, k[my_rank],MPI_DOUBLE,0, MPI_COMM_WORLD); MPI_Scatterv(bn_a, k, k_sum, MPI_DOUBLE, *bn, k[my_rank],MPI_DOUBLE,0, MPI_COMM_WORLD); MPI_Scatterv(bw_a, k, k_sum, MPI_DOUBLE, *bw, k[my_rank],MPI_DOUBLE,0, MPI_COMM_WORLD); MPI_Scatterv(be_a, k, k_sum, MPI_DOUBLE, *be, k[my_rank],MPI_DOUBLE,0, MPI_COMM_WORLD); MPI_Scatterv(bl_a, k, k_sum, MPI_DOUBLE, *bl, k[my_rank],MPI_DOUBLE,0, MPI_COMM_WORLD); MPI_Scatterv(bh_a, k, k_sum, MPI_DOUBLE, *bh, k[my_rank],MPI_DOUBLE,0, MPI_COMM_WORLD); MPI_Scatterv(bp_a, k, k_sum, MPI_DOUBLE, *bp, k[my_rank],MPI_DOUBLE,0, MPI_COMM_WORLD); MPI_Scatterv(su_a, k, k_sum, MPI_DOUBLE, *su, k[my_rank],MPI_DOUBLE,0, MPI_COMM_WORLD); /* create a datatype to describe the subarrays of the global array */ /*int sizes[2] = {num_elems, 6}; int subsizes[2] = {npro, 6}; int starts[2] = {0,0}; MPI_Datatype type, subarrtype; MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_INT, &type); MPI_Type_create_resized(type, 0, npro*sizeof(int), &subarrtype); MPI_Type_commit(&subarrtype);*/ //*lcc = (int**) calloc(sizeof(int*),(npro)); //for ( i = 0; i < npro; i++ ) { // (*lcc)[i] = &(data[6*i]); //(int *) calloc(sizeof(int),(6)); //} /*int *globalptr; if (my_rank == 0) globalptr = &(lcc_a[0][0]); // scatter the array to all processors int sendcounts[num_procs]; int displs[num_procs]; if (my_rank == 0) { for ( i = 0; i < num_procs; i++) sendcounts[i] = 1; // int disp = 0; // for (int i=0; i<; i++) { // for (int j=0; j<; j++) { // displs[i*procgridsize+j] = disp; // disp += 1; // } // disp += ((gridsize/procgridsize)-1)*procgridsize; // } } MPI_Scatterv(&(lcc_a[0][0]),sendcounts, k_c_sum, subarrtype, &((*lcc)[0][0]), npro*6, MPI_INT, 0, MPI_COMM_WORLD);*/ //initialization of computational array for ( i = 0; i < num_elems_pro; i++ ) { (*local_global_index)[i] = my_rank * npro + i; for (j = 0;j < 6;j++){ (*lcc)[i][j]=lcc_b[my_rank*npro+i][j]; } } for ( i = 0; i <= 10; i++ ) { (*oc)[i] = 0.0; (*cnorm)[i] = 1.0; } for ( i = 0; i < num_elems_pro; i++ ) { (*cgup)[i] = 0.0; (*var)[i] = 0.0; } for ( i = num_elems_pro; i < local_array_size; i++ ) { (*var)[i] = 0.0; (*cgup)[i] = 0.0; (*bs)[i] = 0.0; (*be)[i] = 0.0; (*bn)[i] = 0.0; (*bw)[i] = 0.0; (*bl)[i] = 0.0; (*bh)[i] = 0.0; } for ( i = 0; i < num_elems_pro; i++ ) { (*cgup)[i] = 1.0 / ((*bp)[i]); } //part type is not classics but metis }else{ *epart = (int*) calloc(sizeof(int), num_elems); *npart = (int*) calloc(sizeof(int), num_elems*8); if ( my_rank == 0 ) { //parametes and array for metis partition libary idx_t ne = (idx_t) num_elems; idx_t nn = (idx_t) points_num; idx_t ncommon = 4; idx_t nparts = num_procs; int node_num = ne*8; idx_t *eptr = (idx_t*) calloc(sizeof(idx_t), num_elems + 1); idx_t *eind = (idx_t*) calloc(sizeof(idx_t), node_num); idx_t objval_METIS; idx_t *epart_METIS = (idx_t*) calloc(sizeof(idx_t), num_elems); idx_t *npart_METIS = (idx_t*) calloc(sizeof(idx_t), node_num); int metis_final; for ( i = (*nintci); i <= (*nintcf + 1) ; i++ ) { eptr[i] = (idx_t) i*8; } for( i = 0; i < node_num; i++ ) { eind[i] = (idx_t) (*elems)[i]; } if ( strcmp(part_type,"dual") == 0 ) { metis_final = METIS_PartMeshDual(&ne,&nn,eptr, eind, NULL, NULL, &ncommon, &nparts, NULL,NULL, &objval_METIS, epart_METIS, npart_METIS); } else if ( strcmp(part_type,"noda") == 0 ) { metis_final = METIS_PartMeshNodal(&ne,&nn,eptr, eind, NULL, NULL, &nparts, NULL,NULL, &objval_METIS, epart_METIS, npart_METIS); } if ( metis_final != METIS_OK ) { printf("Metis part fails\n"); } (*objval)=(int*) calloc(sizeof(int), 1); (*objval)[0]=(int) objval_METIS; for ( i = 0; i < num_elems; i++ ) { (*epart)[i] = (int) epart_METIS[i]; } for ( i = 0; i < node_num; i++ ) { (*npart)[i] = (int) npart_METIS[i]; } }//single processor //Full METIS arrary should be avaible for every processor MPI_Bcast(*epart,num_elems,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(*npart,num_elems*8,MPI_INT,0,MPI_COMM_WORLD); //ditribute data according to METIS Partition MPI_Barrier(MPI_COMM_WORLD); int p = 0; //int *k = (int*) calloc(sizeof(int), num_procs); //store local to global mapping for ( p = 0; p < num_procs; p++ ) { if (my_rank == p ) { for (j = 0; j < num_elems; j++ ) { if ( (*epart)[j] == my_rank ) { (*local_global_index)[k[my_rank]] = j ; for (i=0;i<6;i++){ (*lcc)[k[my_rank]][i]=lcc_b[j][i]; } k[my_rank] = k[my_rank] + 1; } } } MPI_Bcast(&k[p],1,MPI_INT,p,MPI_COMM_WORLD);/// send k[p] to other processors }///finish storing local to global mapping (num_elems_pro) = k[my_rank]; //int *k_sum = (int*) calloc(sizeof(int), num_procs); int *local_global_index_sum = (int*) calloc(sizeof(int), num_elems); if ( my_rank == 0 ) { for (i = 1; i < num_procs; i++ ) { k_sum[i]=k_sum[i-1]+k[i-1]; } } MPI_Gatherv( *local_global_index, k[my_rank], MPI_INT, local_global_index_sum, k, k_sum, MPI_INT, 0, MPI_COMM_WORLD); //copy B* array into new array accoring to order from metis partition double *bs_b = (double*) calloc(sizeof(double), (num_elems)); double *bn_b = (double*) calloc(sizeof(double), (num_elems)); double *bw_b = (double*) calloc(sizeof(double), (num_elems)); double *be_b = (double*) calloc(sizeof(double), (num_elems)); double *bl_b = (double*) calloc(sizeof(double), (num_elems)); double *bh_b = (double*) calloc(sizeof(double), (num_elems)); double *bp_b = (double*) calloc(sizeof(double), (num_elems)); double *su_b = (double*) calloc(sizeof(double), (num_elems)); if (my_rank==0){ for (i= 0; i<num_elems; i++){ j=local_global_index_sum[i]; bs_b[i]=bs_a[j]; bn_b[i]=bn_a[j]; bw_b[i]=bw_a[j]; be_b[i]=be_a[j]; bl_b[i]=bl_a[j]; bh_b[i]=bh_a[j]; bp_b[i]=bp_a[j]; su_b[i]=su_a[j]; } } MPI_Scatterv( bs_b, k, k_sum , MPI_DOUBLE, *bs, k[my_rank],MPI_DOUBLE,0, MPI_COMM_WORLD); MPI_Scatterv( bn_b, k, k_sum , MPI_DOUBLE, *bn, k[my_rank],MPI_DOUBLE,0, MPI_COMM_WORLD); MPI_Scatterv( bw_b, k, k_sum , MPI_DOUBLE, *bw, k[my_rank],MPI_DOUBLE,0, MPI_COMM_WORLD); MPI_Scatterv( be_b, k, k_sum , MPI_DOUBLE, *be, k[my_rank],MPI_DOUBLE,0, MPI_COMM_WORLD); MPI_Scatterv( bl_b, k, k_sum , MPI_DOUBLE, *bl, k[my_rank],MPI_DOUBLE,0, MPI_COMM_WORLD); MPI_Scatterv( bh_b, k, k_sum , MPI_DOUBLE, *bh, k[my_rank],MPI_DOUBLE,0, MPI_COMM_WORLD); MPI_Scatterv( bp_b, k, k_sum , MPI_DOUBLE, *bp, k[my_rank],MPI_DOUBLE,0, MPI_COMM_WORLD); MPI_Scatterv( su_b, k, k_sum , MPI_DOUBLE, *su, k[my_rank],MPI_DOUBLE,0, MPI_COMM_WORLD); //initialization computational array for ( i = 0; i <= 10; i++ ) { (*oc)[i] = 0.0; (*cnorm)[i] = 1.0; } for ( i = 0; i < num_elems_pro; i++ ) { (*cgup)[i] = 0.0; (*var)[i] = 0.0; } for ( i = num_elems_pro; i < local_array_size; i++ ) { (*var)[i] = 0.0; (*cgup)[i] = 0.0; (*bs)[i] = 0.0; (*be)[i] = 0.0; (*bn)[i] = 0.0; (*bw)[i] = 0.0; (*bl)[i] = 0.0; (*bh)[i] = 0.0; } for ( i = 0; i < num_elems_pro; i++ ) { (*cgup)[i] = 1.0 / ((*bp)[i]); } MPI_Barrier(MPI_COMM_WORLD); free(bp_b); free(bh_b); free(bl_b); free(bw_b); free(bn_b); free(be_b); free(bs_b); free(su_b); free(local_global_index_sum); }//finish choose part type section and all local array are stored MPI_Barrier(MPI_COMM_WORLD); //************Comunication List*********// *num_elems_local = num_elems_pro; *neighbors_count = num_procs-1; *send_count = (int*) calloc(sizeof(int), (num_procs)); *recv_count = (int*) calloc(sizeof(int), (num_procs)); *send_list = (int **) calloc(*neighbors_count+1, sizeof(int*)); for ( i = 0; i < *neighbors_count+1; i++ ) { (*send_list)[i] = (int *) calloc(6*num_elems_pro, sizeof(int)); } *recv_list = (int **) calloc(*neighbors_count+1, sizeof(int*)); for ( i = 0; i < *neighbors_count+1; i++ ) { (*recv_list)[i] = (int *) calloc(6*num_elems_pro, sizeof(int)); } //MPI_Barrier(MPI_COMM_WORLD); int num_elems_global=0; int* rank = (int*) calloc(sizeof(int), (num_procs)); int m = 0; for (i = 0; i < num_elems_pro; i++) { for (j = 0; j < 6; j++ ) { num_elems_global=(*lcc)[i][j]; // choose only ghost cell if (num_elems_global < num_elems){ // choose part type if (strcmp(part_type,"classical") == 0) { if (num_elems_global >= npro * num_procs){ rank[my_rank]= num_elems_global/npro-1; }else{ rank[my_rank]= num_elems_global/npro; } } else { rank[my_rank]=(*epart)[num_elems_global]; }///end choosing part type if (rank[my_rank] != my_rank ) { (*send_list)[rank[my_rank]][(*send_count)[rank[my_rank]]] = (*local_global_index)[i]; (*send_count)[rank[my_rank]]=(*send_count)[rank[my_rank]]+1; (*recv_list)[rank[my_rank]][(*recv_count)[rank[my_rank]]] = num_elems_global; (*recv_count)[rank[my_rank]]=(*recv_count)[rank[my_rank]]+1; } }///choose ghost cell }///end j for loop }///end i for loop free(lcc_b); if (my_rank == 0) { printf("Initializition finished successfully!\n"); } return 0; }
int partition(int part_key, int read_key, int myrank, int nprocs, int nintci_g, int nintcf_g, int nextci_g, int nextcf_g, int *nintci, int *nintcf, int *nextci, int *nextcf, int **lcc_g, int points_count_g, int **points_g, int *elems_g, int *int_cells_per_proc, int *extcell_per_proc, int **local_global_index_g, int **local_global_index, int **partitioning_map) { int i=0; idx_t nelems, nnodes, ncommon=4, nparts, objval; idx_t *elem_ptr, *elem_idx, *elem_part, *node_part; nelems = nintcf_g-nintci_g+1; *partitioning_map = (int *) calloc(sizeof(int), (nintcf_g-nintci_g+1)); check_allocation(myrank, partitioning_map, "Partitioning map allocation failed"); if (((read_key == POSL_INIT_ONE_READ) && (myrank == 0)) || (read_key == POSL_INIT_ALL_READ)) { *nintci = 0; *nintcf = 0; if (part_key == POSL_PARTITIONING_CLASSIC) { //the last processor always gets different number of cells int elem_per_proc = (nelems+(nprocs-1))/nprocs; *nextci = (myrank == nprocs-1) ? nelems-(nprocs-1)*elem_per_proc : elem_per_proc; *nintcf = *nextci-1; //build global cell allocation if (read_key == POSL_INIT_ONE_READ) { for (i=0; i<(nprocs-1); ++i) { int_cells_per_proc[i] = elem_per_proc; } int_cells_per_proc[nprocs-1] = nelems-(nprocs-1)*elem_per_proc; } for (i=0; i<nelems; ++i) { (*partitioning_map)[i] = i/elem_per_proc; } } else { //initialize variables for metis nnodes = points_count_g; nparts = nprocs; elem_ptr = (idx_t *) calloc(nelems+1, sizeof(idx_t)); elem_idx = (idx_t *) calloc(nelems*8, sizeof(idx_t)); elem_part = (idx_t *) calloc(nelems, sizeof(idx_t)); node_part = (idx_t *) calloc(nnodes, sizeof(idx_t)); //assign arrays that store metis graph mesh for (i=0; i<(nelems+1); i++) { elem_ptr[i] = 8*i; } for (i=0; i<(nelems*8); i++) { elem_idx[i] = elems_g[i]; } //perform metis partitioning if (part_key == POSL_PARTITIONING_DUAL) { METIS_PartMeshDual(&nelems, &nnodes, elem_ptr, elem_idx, NULL, NULL, &ncommon, &nparts, NULL, NULL, &objval, elem_part, node_part); } else { METIS_PartMeshNodal(&nelems, &nnodes, elem_ptr, elem_idx, NULL, NULL, &nparts, NULL, NULL, &objval, elem_part, node_part); } for (i=0; i<nelems; i++) { (*partitioning_map)[i] = (int) elem_part[i]; } //initialize global cell counters if (read_key == POSL_INIT_ONE_READ) { for (i=0; i<nprocs; ++i) { int_cells_per_proc[i] = 0; } } //TODO: consider performance gains when if statement is outside of the loop //compute position of last internal cell for (i=0; i<nelems; i++) { if (read_key == POSL_INIT_ONE_READ) { int_cells_per_proc[(*partitioning_map)[i]] += 1; } else { if (myrank == (*partitioning_map)[i]) { (*nintcf) += 1; } } } //assign local internal cell ending idx if (read_key == POSL_INIT_ONE_READ) { *nintcf = int_cells_per_proc[myrank]; } *nextci = (*nintcf)--; } } return POSL_OK; }
/************************************************************************* * Let the game begin **************************************************************************/ int main(int argc, char *argv[]) { idxtype i, j, ne, nn, etype, mtype, numflag=0, nparts, edgecut; idxtype *elmnts, *epart, *npart, *metype, *weights; double IOTmr, DUALTmr; char etypestr[5][5] = {"TRI", "TET", "HEX", "QUAD", "LINE"}; GraphType graph; if (argc != 3) { mprintf("Usage: %s <meshfile> <nparts>\n",argv[0]); exit(0); } nparts = atoi(argv[2]); if (nparts < 2) { mprintf("nparts must be greater than one.\n"); exit(0); } gk_clearcputimer(IOTmr); gk_clearcputimer(DUALTmr); mtype=MeshType(argv[1]); ne=MixedElements(argv[1]); metype = idxmalloc(ne, "main: metype"); weights = idxmalloc(ne, "main: weights"); gk_startcputimer(IOTmr); if(mtype==1) elmnts = ReadMesh(argv[1], &ne, &nn, &etype); else if(mtype==3) elmnts = ReadMeshWgt(argv[1], &ne, &nn, &etype, weights); else if(mtype==0) elmnts = ReadMixedMesh(argv[1], &ne, &nn, metype); else elmnts = ReadMixedMeshWgt(argv[1], &ne, &nn, metype, weights); gk_stopcputimer(IOTmr); epart = idxmalloc(ne, "main: epart"); npart = idxmalloc(nn, "main: npart"); mprintf("**********************************************************************\n"); mprintf("%s", METISTITLE); mprintf("Mesh Information ----------------------------------------------------\n"); if (mtype==1) mprintf(" Name: %s, #Elements: %D, #Nodes: %D, Etype: %s\n\n", argv[1], ne, nn, etypestr[etype-1]); else mprintf(" Name: %s, #Elements: %D, #Nodes: %D, Etype: %s\n\n", argv[1], ne, nn, "Mixed"); mprintf("Partitioning Nodal Graph... -----------------------------------------\n"); gk_startcputimer(DUALTmr); if (mtype==1 || mtype==3) METIS_PartMeshNodal(&ne, &nn, elmnts, &etype, &numflag, &nparts, &edgecut, epart, npart); else METIS_PartMixedMeshNodal(&ne, &nn, elmnts, metype, &numflag, &nparts, &edgecut, epart, npart); gk_stopcputimer(DUALTmr); mprintf(" %D-way Edge-Cut: %7D, Balance: %5.2f\n", nparts, edgecut, ComputeElementBalance(ne, nparts, epart)); gk_startcputimer(IOTmr); WriteMeshPartition(argv[1], nparts, ne, epart, nn, npart); gk_stopcputimer(IOTmr); mprintf("\nTiming Information --------------------------------------------------\n"); mprintf(" I/O: \t\t %7.3f\n", gk_getcputimer(IOTmr)); mprintf(" Partitioning: \t\t %7.3f\n", gk_getcputimer(DUALTmr)); mprintf("**********************************************************************\n"); /* graph.nvtxs = ne; graph.xadj = idxmalloc(ne+1, "xadj"); graph.vwgt = idxsmalloc(ne, 1, "vwgt"); graph.adjncy = idxmalloc(10*ne, "adjncy"); graph.adjwgt = idxsmalloc(10*ne, 1, "adjncy"); METIS_MeshToDual(&ne, &nn, elmnts, &etype, &numflag, graph.xadj, graph.adjncy); ComputePartitionInfo(&graph, nparts, epart); gk_free((void **)&graph.xadj, &graph.adjncy, &graph.vwgt, &graph.adjwgt, LTERM); */ gk_free((void **)&elmnts, &epart, &npart, &metype, &weights, LTERM); }