int FEM_master_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context){ const char *caller="FEM_Create_connmsa"; FEMAPI(caller); FEM_chunk *c=FEM_chunk::get(caller); FEM_Mesh *m=c->lookup(fem_mesh,caller); m->setAbsoluteGlobalno(); int nelem = m->nElems(); int numChunks; MPI_Comm_size((MPI_Comm)comm_context,&numChunks); printf("master -> number of elements %d \n",nelem); DEBUG(m->print(0)); /*load the connectivity information into the eptr and eind datastructure. It will be read by the other slave elements and used to call parmetis*/ MSA1DINT eptrMSA(nelem,numChunks); MSA1DINT eindMSA(nelem*10,numChunks); /* after the msa array has been created and loaded with connectivity data tell the slaves about the msa array */ struct conndata data; data.nelem = nelem; data.nnode = m->node.size(); data.arr1 = eptrMSA; data.arr2 = eindMSA; MPI_Bcast_pup(data,masterRank,(MPI_Comm)comm_context); eptrMSA.enroll(numChunks); eindMSA.enroll(numChunks); MSA1DINT::Write wPtr = eptrMSA.getInitialWrite(); MSA1DINT::Write wInd = eindMSA.getInitialWrite(); int indcount=0,ptrcount=0; for(int t=0;t<m->elem.size();t++){ if(m->elem.has(t)){ FEM_Elem &k=m->elem[t]; for(int e=0;e<k.size();e++){ wPtr.set(ptrcount)=indcount; ptrcount++; for(int n=0;n<k.getNodesPer();n++){ wInd.set(indcount)=k.getConn(e,n); indcount++; } } } } wPtr.set(ptrcount) = indcount; printf("master -> ptrcount %d indcount %d sizeof(MSA1DINT) %d sizeof(MSA1DINTLIST) %d memory %d\n",ptrcount,indcount,sizeof(MSA1DINT),sizeof(MSA1DINTLIST),CmiMemoryUsage()); /* break up the mesh such that each chunk gets the same number of elements and the nodes corresponding to those elements. However this is not the partition. This is just distributing the data, so that when partition is done using parmetis all the requests for data do not go to chunk 0. Instead after partition each chunk can send the element and node data to the chunks that will need it */ FEM_Mesh *mesh_array=FEM_break_mesh(m,ptrcount,numChunks); /* Send the broken up meshes to the different chunks. */ sendBrokenMeshes(mesh_array,comm_context); delete [] mesh_array; FEM_Mesh mypiece; MPI_Recv_pup(mypiece,masterRank,MESH_CHUNK_TAG,(MPI_Comm)comm_context); /* call parmetis */ double parStartTime = CkWallTimer(); MSA1DINT::Read rPtr = wPtr.syncToRead(); MSA1DINT::Read rInd = wInd.syncToRead(); printf("starting FEM_call_parmetis \n"); struct partconndata *partdata = FEM_call_parmetis(data.nelem, rPtr, rInd, comm_context); printf("done with parmetis %d FEM_Mesh %d in %.6lf \n",CmiMemoryUsage(),sizeof(FEM_Mesh),CkWallTimer()-parStartTime); double dataArrangeStartTime = CkWallTimer(); /* Set up a msa to store the partitions to which a node belongs. A node can belong to multiple partitions. */ int totalNodes = m->node.size(); MSA1DINTLIST nodepart(totalNodes,numChunks); MPI_Bcast_pup(nodepart,masterRank,(MPI_Comm)comm_context); nodepart.enroll(numChunks); MSA1DINTLIST::Accum nodepartAcc = nodepart.getInitialAccum(); FEM_write_nodepart(nodepartAcc,partdata,(MPI_Comm)comm_context); printf("Creating mapping of node to partition took %.6lf\n",CkWallTimer()-dataArrangeStartTime); dataArrangeStartTime = CkWallTimer(); MSA1DINTLIST::Read nodepartRead = nodepartAcc.syncToRead(); /* Set up a msa to store the nodes that belong to a partition */ MSA1DNODELIST part2node(numChunks,numChunks); MPI_Bcast_pup(part2node,masterRank,(MPI_Comm)comm_context); part2node.enroll(numChunks); MSA1DNODELIST::Accum part2nodeAcc = part2node.getInitialAccum(); FEM_write_part2node(nodepartRead, part2nodeAcc, partdata, (MPI_Comm)comm_context); /* Get the list of elements and nodes that belong to this partition */ MSA1DNODELIST::Read rPart2node = part2nodeAcc.syncToRead(); NodeList lnodes = rPart2node.get(masterRank); lnodes.uniquify(); // IntList lelems = part2elem.get(masterRank); printf("Creating mapping of partition to node took %.6lf\n",CkWallTimer()-dataArrangeStartTime); printf("Time spent doing +=ElemList %.6lf \n",elemlistaccTime); dataArrangeStartTime = CkWallTimer(); /* Build an MSA of FEM_Mesh, with each index containing the mesh for that chunk */ MSA1DFEMMESH part2mesh(numChunks,numChunks); MPI_Bcast_pup(part2mesh,masterRank,(MPI_Comm)comm_context); part2mesh.enroll(numChunks); MSA1DFEMMESH::Accum aPart2mesh = part2mesh.getInitialAccum(); FEM_write_part2mesh(aPart2mesh,partdata, &data,nodepartRead,numChunks,masterRank,&mypiece); /* Get your mesh consisting of elements and nodes out of the mesh MSA */ MSA1DFEMMESH::Read rPart2mesh = aPart2mesh.syncToRead(); MeshElem me = rPart2mesh.get(masterRank); //printf("[%d] Number of elements in my partitioned mesh %d number of nodes %d \n",masterRank,me.m->nElems(),me.m->node.size()); DEBUG(printf("[%d] Memory usage on vp 0 close to max %d \n",CkMyPe(),CmiMemoryUsage())); //Free up the eptr and eind MSA arrays stored in data delete &rPtr; delete &rInd; data.arr1.FreeMem(); data.arr2.FreeMem(); nodepart.FreeMem(); DEBUG(printf("[%d] Memory usage on vp 0 after FreeMem %d \n",CkMyPe(),CmiMemoryUsage())); addIDXLists(me.m,lnodes,masterRank); part2node.FreeMem(); DEBUG(printf("[%d] Memory usage on vp 0 after addIDXL %d \n",CkMyPe(),CmiMemoryUsage())); /* Broadcast the user data to all the meshes */ DEBUG(printf("[%d] Length of udata vector in master %d \n",masterRank,m->udata.size())); MPI_Bcast_pup(m->udata,masterRank,(MPI_Comm)comm_context); me.m->udata = m->udata; delete partdata; printf("[%d] Data Arrangement took %.6lf \n",masterRank,CkWallTimer()-dataArrangeStartTime); /* collect the ghost data and send it to all the chunks. */ struct ghostdata *gdata = gatherGhosts(); DEBUG(printf("[%d] number of ghost layers %d \n",masterRank,gdata->numLayers)); MPI_Bcast_pup(*gdata,masterRank,(MPI_Comm)comm_context); /* make ghosts for this mesh */ printf("[%d] Starting to generate number of ghost layers %d \n",masterRank,gdata->numLayers); double _startTime = CkWallTimer(); makeGhosts(me.m,(MPI_Comm)comm_context,masterRank,gdata->numLayers,gdata->layers); delete gdata; printf("[%d] Ghost generation took %.6lf \n",masterRank,CkWallTimer()-_startTime); me.m->becomeGetting(); FEM_chunk *chunk = FEM_chunk::get("FEM_Mesh_Parallel_broadcast"); int tempMeshNo = chunk->meshes.put(me.m); int new_mesh = FEM_Mesh_copy(tempMeshNo); FEM_Mesh *nmesh = c->lookup(new_mesh,"master_parallel_broadcast"); DEBUG(printf("[%d] Length of udata vector in master new_mesh %d \n",masterRank,nmesh->udata.size())); part2mesh.FreeMem(); printf("[%d] Max Memory usage on vp 0 at end of parallel partition %d \n",CkMyPe(),CmiMaxMemoryUsage()); return new_mesh; }
int FEM_slave_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context){ int myRank; MPI_Comm_rank((MPI_Comm)comm_context,&myRank); int numChunks; MPI_Comm_size((MPI_Comm)comm_context,&numChunks); /*Receive the name of the msa arrays that contain the connectivity information*/ struct conndata data; MPI_Bcast_pup(data,masterRank,(MPI_Comm)comm_context); data.arr1.enroll(numChunks); data.arr2.enroll(numChunks); DEBUG(printf("Recv -> %d \n",data.nelem)); /* Receive the broken up mesh from the masterRank. These will be used later to give each partitioned mesh its elements and data. */ FEM_Mesh mypiece; MPI_Recv_pup(mypiece,masterRank,MESH_CHUNK_TAG,(MPI_Comm)comm_context); /* call parmetis and get the resuts back from it */ MSA1DINT::Read rPtr = data.arr1.getInitialWrite().syncToRead(); MSA1DINT::Read rInd = data.arr1.getInitialWrite().syncToRead(); struct partconndata *partdata = FEM_call_parmetis(data.nelem, rPtr, rInd, comm_context); /* write to the msa that contains the partitions to which a node belongs */ MSA1DINTLIST nodepart; MPI_Bcast_pup(nodepart,masterRank,(MPI_Comm)comm_context); nodepart.enroll(numChunks); MSA1DINTLIST::Accum nodepartAcc = nodepart.getInitialAccum(); FEM_write_nodepart(nodepartAcc,partdata,(MPI_Comm)comm_context); /* write to the msa that stores the nodes that belong to each partition */ MSA1DNODELIST part2node; MPI_Bcast_pup(part2node,masterRank,(MPI_Comm)comm_context); part2node.enroll(numChunks); MSA1DNODELIST::Accum part2nodeAcc = part2node.getInitialAccum(); MSA1DINTLIST::Read nodepartRead = nodepartAcc.syncToRead(); FEM_write_part2node(nodepartRead, part2nodeAcc, partdata, (MPI_Comm)comm_context); /* Get the list of elements and nodes that belong to this partition */ MSA1DNODELIST::Read part2nodeRead = part2nodeAcc.syncToRead(); NodeList lnodes = part2nodeRead.get(myRank); lnodes.uniquify(); // IntList lelems = part2elem.get(myRank); /* Get the FEM msa and write the different mesh */ MSA1DFEMMESH part2mesh; MPI_Bcast_pup(part2mesh,masterRank,(MPI_Comm)comm_context); part2mesh.enroll(numChunks); MSA1DFEMMESH::Accum aPart2mesh = part2mesh.getInitialAccum(); FEM_write_part2mesh(aPart2mesh, partdata, &data, nodepartRead,numChunks, myRank, &mypiece); /* Get your mesh consisting of elements and nodes out of the mesh MSA */ MSA1DFEMMESH::Read rPart2mesh = aPart2mesh.syncToRead(); MeshElem me = rPart2mesh.get(myRank); //printf("[%d] Number of elements in my partitioned mesh %d number of nodes %d \n",myRank,me.m->nElems(),me.m->node.size()); //Free up the eptr and eind MSA arrays stored in data delete &rPtr; delete &rInd; data.arr1.FreeMem(); data.arr2.FreeMem(); nodepart.FreeMem(); addIDXLists(me.m,lnodes,myRank); /* Receive the user data from master */ MPI_Bcast_pup(me.m->udata,masterRank,(MPI_Comm)comm_context); DEBUG(printf("[%d] Length of udata vector %d \n",myRank,me.m->udata.size())); delete partdata; struct ghostdata *gdata = new ghostdata; MPI_Bcast_pup(*gdata,masterRank,(MPI_Comm)comm_context); //printf("[%d] number of ghost layers %d \n",myRank,gdata->numLayers); /* make ghosts */ makeGhosts(me.m,(MPI_Comm )comm_context,masterRank,gdata->numLayers,gdata->layers); me.m->becomeGetting(); FEM_chunk *chunk = FEM_chunk::get("FEM_Mesh_Parallel_broadcast"); int tempMeshNo = chunk->meshes.put(me.m); int new_mesh = FEM_Mesh_copy(tempMeshNo); part2mesh.FreeMem(); delete gdata; return new_mesh; }