void DataBus::transferNodes(TetrMeshSecondOrder* mesh, vector<AABB>* _reqZones) { AABB **reqZones = new AABB*[numberOfWorkers]; AABB *reqZones_data = new AABB[numberOfWorkers*numberOfWorkers]; for ( int i = 0; i < numberOfWorkers; ++i ) { reqZones[i] = reqZones_data + (i*numberOfWorkers); } for (int i = 0 ; i < numberOfWorkers; i++) for (int j = 0 ; j < numberOfWorkers; j++) if( !isinf(_reqZones[i][j].minX) ) { reqZones[i][j] = _reqZones[i].at(j); LOG_DEBUG("CPU " << i << " asks from CPU " << j << " area: " << reqZones[i][j]); } auto& engine = Engine::getInstance(); Body* body = engine.getBodyById( engine.getDispatcher()->getMyBodyId() ); TetrMeshSecondOrder* myMesh = (TetrMeshSecondOrder*)body->getMeshes(); int numberOfNodes[numberOfWorkers][numberOfWorkers]; int numberOfTetrs[numberOfWorkers][numberOfWorkers]; for (int i = 0 ; i < numberOfWorkers; i++) for (int j = 0 ; j < numberOfWorkers; j++) { numberOfNodes[i][j] = 0; numberOfTetrs[i][j] = 0; } // Looking how many nodes and tetrs we are going to send map<int,int>* sendNodesMap = new map<int,int>[numberOfWorkers]; map<int,int>* addNodesMap = new map<int,int>[numberOfWorkers]; map<int,int>* sendTetrsMap = new map<int,int>[numberOfWorkers]; for (int i = 0 ; i < numberOfWorkers; i++) { if( !isinf(reqZones[i][rank].minX) ) { for( int j = 0; j < myMesh->nodesNumber; j++ ) { CalcNode& node = myMesh->getNodeByLocalIndex(j); if( reqZones[i][rank].isInAABB( node ) ) { numberOfNodes[rank][i]++; sendNodesMap[i][ node.number ] = j; } } for( int j = 0; j < myMesh->tetrsNumber; j++ ) { TetrSecondOrder& tetr = myMesh->getTetr2ByLocalIndex(j); if( sendNodesMap[i].find(tetr.verts[0]) != sendNodesMap[i].end() || sendNodesMap[i].find(tetr.verts[1]) != sendNodesMap[i].end() || sendNodesMap[i].find(tetr.verts[2]) != sendNodesMap[i].end() || sendNodesMap[i].find(tetr.verts[3]) != sendNodesMap[i].end() || sendNodesMap[i].find(tetr.addVerts[0]) != sendNodesMap[i].end() || sendNodesMap[i].find(tetr.addVerts[1]) != sendNodesMap[i].end() || sendNodesMap[i].find(tetr.addVerts[2]) != sendNodesMap[i].end() || sendNodesMap[i].find(tetr.addVerts[3]) != sendNodesMap[i].end() || sendNodesMap[i].find(tetr.addVerts[4]) != sendNodesMap[i].end() || sendNodesMap[i].find(tetr.addVerts[5]) != sendNodesMap[i].end() ) { numberOfTetrs[rank][i]++; sendTetrsMap[i][ tetr.number ] = j; for( int k = 0; k < 4; k++ ) { if( sendNodesMap[i].find(tetr.verts[k]) == sendNodesMap[i].end() && addNodesMap[i].find(tetr.verts[k]) == addNodesMap[i].end() ) { numberOfNodes[rank][i]++; addNodesMap[i][ tetr.verts[k] ] = myMesh->getNodeLocalIndex(tetr.verts[k]); } } for( int k = 0; k < 6; k++ ) { if( sendNodesMap[i].find(tetr.addVerts[k]) == sendNodesMap[i].end() && addNodesMap[i].find(tetr.addVerts[k]) == addNodesMap[i].end() ) { numberOfNodes[rank][i]++; addNodesMap[i][ tetr.addVerts[k] ] = myMesh->getNodeLocalIndex(tetr.addVerts[k]); } } } } } } BARRIER("DataBus::transferNodes#1"); MPI::COMM_WORLD.Allgather( MPI_IN_PLACE, numberOfWorkers, MPI_INT, numberOfNodes, numberOfWorkers, MPI_INT ); BARRIER("DataBus::transferNodes#2"); MPI::COMM_WORLD.Allgather( MPI_IN_PLACE, numberOfWorkers, MPI_INT, numberOfTetrs, numberOfWorkers, MPI_INT ); BARRIER("DataBus::transferNodes#3"); for (int i = 0 ; i < numberOfWorkers; i++) for (int j = 0 ; j < numberOfWorkers; j++) if( numberOfNodes[i][j] != 0 ) { LOG_DEBUG("CPU " << i << " is going to send to CPU " << j << " " << numberOfNodes[i][j] << " nodes and " << numberOfTetrs[i][j] << " tetrs"); // if( rank == j && mesh->getNodesNumber() == 0 ) // mesh->createNodes(numberOfNodes[i][j]); // if( rank == j && mesh->getTetrsNumber() == 0 ) // mesh->createTetrs(numberOfTetrs[i][j]); } vector<MPI::Request> reqs; CalcNode** recNodes = new CalcNode*[numberOfWorkers]; TetrSecondOrder** recTetrs = new TetrSecondOrder*[numberOfWorkers]; for( int i = 0; i < numberOfWorkers; i++ ) { if( i != rank && numberOfNodes[i][rank] > 0 ) { recNodes[i] = new CalcNode[numberOfNodes[i][rank]]; recTetrs[i] = new TetrSecondOrder[numberOfTetrs[i][rank]]; reqs.push_back( MPI::COMM_WORLD.Irecv( recNodes[i], numberOfNodes[i][rank], MPI_ELNODE_NUMBERED, i, TAG_GET_TETRS_N+i ) ); reqs.push_back( MPI::COMM_WORLD.Irecv( recTetrs[i], numberOfTetrs[i][rank], MPI_TETR_NUMBERED, i, TAG_GET_TETRS_T+i ) ); } else { recNodes[i] = NULL; recTetrs[i] = NULL; } } int max_len = 0; for (int i = 0; i< numberOfWorkers; i++) { if (numberOfNodes[rank][i] > max_len) max_len = numberOfNodes[rank][i]; if (numberOfTetrs[rank][i] > max_len) max_len = numberOfTetrs[rank][i]; } int *lens = new int[max_len]; for (int i = 0; i < max_len; i++) lens[i] = 1; MPI::Datatype *n = new MPI::Datatype[numberOfWorkers]; MPI::Datatype *t = new MPI::Datatype[numberOfWorkers]; vector<int> displ; map<int, int>::const_iterator itr; for( int i = 0; i < numberOfWorkers; i++ ) { if( i != rank && numberOfNodes[rank][i] > 0 ) { displ.clear(); for( itr = sendNodesMap[i].begin(); itr != sendNodesMap[i].end(); ++itr ) displ.push_back(itr->second); for( itr = addNodesMap[i].begin(); itr != addNodesMap[i].end(); ++itr ) displ.push_back(itr->second); sort( displ.begin(), displ.end() ); n[i] = MPI_ELNODE_NUMBERED.Create_indexed(numberOfNodes[rank][i], lens, &displ[0]); n[i].Commit(); displ.clear(); for( itr = sendTetrsMap[i].begin(); itr != sendTetrsMap[i].end(); ++itr ) displ.push_back(itr->second); sort( displ.begin(), displ.end() ); t[i] = MPI_TETR_NUMBERED.Create_indexed(numberOfTetrs[rank][i], lens, &displ[0]); t[i].Commit(); reqs.push_back( MPI::COMM_WORLD.Isend( &(myMesh->nodes[0]), 1, n[i], i, TAG_GET_TETRS_N+rank ) ); reqs.push_back( MPI::COMM_WORLD.Isend( &(myMesh->tetrs2[0]),//mesh->getTetrByLocalIndex(0), 1, t[i], i, TAG_GET_TETRS_T+rank ) ); } } // FIXME - we suppose here that one process will send nodes for one mesh only (!) TetrMeshSecondOrder* targetMesh = NULL; MPI::Request::Waitall(reqs.size(), &reqs[0]); BARRIER("DataBus::transferNodes#4"); LOG_DEBUG("Processing received data"); for( int i = 0; i < numberOfWorkers; i++ ) { if( i != rank && numberOfNodes[i][rank] > 0 ) { LOG_DEBUG("Processing nodes"); LOG_DEBUG("Worker " << rank << " data from " << i << ". " << "Nodes size " << numberOfNodes[i][rank] << " " << "Tetrs size " << numberOfTetrs[i][rank]); for( int j = 0; j < numberOfNodes[i][rank]; j++ ) { int num = recNodes[i][j].number; unsigned char bodyNum = recNodes[i][j].bodyId; targetMesh = (TetrMeshSecondOrder*) engine.getBody(bodyNum)->getMeshes(); if( targetMesh->getNodesNumber() == 0 ) { targetMesh->createNodes( numberOfNodes[i][rank] ); LOG_DEBUG("Nodes storage created for body " << (int)bodyNum << ". Size: " << numberOfNodes[i][rank]); } if( ! targetMesh->hasNode(num) ) { recNodes[i][j].setPlacement(false); targetMesh->addNode(recNodes[i][j]); } } LOG_DEBUG("Processing tetrs"); if( targetMesh->getTetrsNumber() == 0 ) { targetMesh->createTetrs( numberOfTetrs[i][rank] ); LOG_DEBUG("Tetrs storage created. Size: " << numberOfTetrs[i][rank]); } for( int j = 0; j < numberOfTetrs[i][rank]; j++ ) { int num = recTetrs[i][j].number; if( ! targetMesh->hasTetr(num) ) { targetMesh->addTetr2(recTetrs[i][j]); } } } } reqs.clear(); for( int i = 0; i < numberOfWorkers; i++ ) { if( i != rank && numberOfNodes[rank][i] > 0 ) { n[i].Free(); t[i].Free(); } if( recNodes[i] != NULL ) delete[] recNodes[i]; if( recTetrs[i] != NULL ) delete[] recTetrs[i]; } delete[] recNodes; delete[] recTetrs; delete[] sendNodesMap; delete[] addNodesMap; delete[] sendTetrsMap; delete[] lens; delete[] n; delete[] t; delete[] reqZones_data; delete[] reqZones; LOG_DEBUG("Nodes transfer done"); }
void DataBus::createDynamicTypes(int bodyNum) { LOG_DEBUG("Building dynamic MPI types for fast node sync"); auto& engine = Engine::getInstance(); GCMDispatcher* dispatcher = engine.getDispatcher(); Body* body = engine.getBody(bodyNum);//ById( engine.getDispatcher()->getMyBodyId() ); TetrMeshSecondOrder* mesh = (TetrMeshSecondOrder*)body->getMeshes(); // TODO add more cleanup code here to prevent memory leaks if (MPI_NODE_TYPES != NULL) { LOG_TRACE("Cleaning old types"); for (int i = 0; i < numberOfWorkers; i++) { for (int j = 0; j < numberOfWorkers; j++) { LOG_TRACE("Cleaning type " << i << " " << j ); LOG_TRACE("Size " << i << " " << j << " = " << local_numbers[i][j].size()); if (local_numbers[i][j].size() > 0) MPI_NODE_TYPES[i][j].Free(); } } delete[] MPI_NODE_TYPES; } if (local_numbers != NULL) { for (int i = 0; i < numberOfWorkers; i++) delete[] local_numbers[i]; delete[] local_numbers; } // FIXME - it's overhead local_numbers = new vector<int>*[numberOfWorkers]; vector<int> **remote_numbers = new vector<int>*[numberOfWorkers]; MPI_NODE_TYPES = new MPI::Datatype*[numberOfWorkers]; for (int i = 0; i < numberOfWorkers; i++) { local_numbers[i] = new vector<int>[numberOfWorkers]; remote_numbers[i] = new vector<int>[numberOfWorkers]; MPI_NODE_TYPES[i] = new MPI::Datatype[numberOfWorkers]; } BARRIER("DataBus::createDynamicTypes#0"); // find all remote nodes for (int j = 0; j < mesh->getNodesNumber(); j++) { CalcNode& node = mesh->getNodeByLocalIndex(j); if ( node.isRemote() ) { //LOG_DEBUG("N: " << j); //LOG_DEBUG("R1: " << j << " " << mesh->getBody()->getId()); int owner = dispatcher->getOwner(node.coords/*, mesh->getBody()->getId()*/); //LOG_DEBUG("R2: " << owner); assert_ne(owner, rank ); local_numbers[rank][owner].push_back( mesh->nodesMap[node.number] ); remote_numbers[rank][owner].push_back(node.number); } } BARRIER("DataBus::createDynamicTypes#1"); LOG_DEBUG("Requests prepared:"); for (int i = 0; i < numberOfWorkers; i++) for (int j = 0; j < numberOfWorkers; j++) LOG_DEBUG("Request size from #" << i << " to #" << j << ": " << local_numbers[i][j].size()); // sync types unsigned int max_len = 0; for (int i = 0; i < numberOfWorkers; i++) for (int j = 0; j < numberOfWorkers; j++) if (local_numbers[i][j].size() > max_len) max_len = local_numbers[i][j].size(); vector<int> lengths; for (unsigned int i = 0; i < max_len; i++) lengths.push_back(1); int info[3]; vector<MPI::Request> reqs; for (int i = 0; i < numberOfWorkers; i++) for (int j = 0; j < numberOfWorkers; j++) if (local_numbers[i][j].size() > 0) { info[0] = remote_numbers[i][j].size(); info[1] = i; info[2] = j; MPI_NODE_TYPES[i][j] = MPI_ELNODE.Create_indexed( local_numbers[i][j].size(), &lengths[0], &local_numbers[i][j][0] ); MPI_NODE_TYPES[i][j].Commit(); reqs.push_back( MPI::COMM_WORLD.Isend( &remote_numbers[i][j][0], remote_numbers[i][j].size(), MPI::INT, j, TAG_SYNC_NODE_TYPES ) ); reqs.push_back( MPI::COMM_WORLD.Isend( info, 3, MPI::INT, j, TAG_SYNC_NODE_TYPES_I ) ); } BARRIER("DataBus::createDynamicTypes#2"); MPI::Status status; while (MPI::COMM_WORLD.Iprobe(MPI::ANY_SOURCE, TAG_SYNC_NODE_TYPES_I, status)) { MPI::COMM_WORLD.Recv( info, 3, MPI::INT, status.Get_source(), TAG_SYNC_NODE_TYPES_I ); local_numbers[info[1]][info[2]].resize(info[0]); MPI::COMM_WORLD.Recv( &local_numbers[info[1]][info[2]][0], info[0], MPI::INT, status.Get_source(), TAG_SYNC_NODE_TYPES ); if (lengths.size() < (unsigned)info[0]) for (int i = lengths.size(); i < info[0]; i++) lengths.push_back(1); for(int i = 0; i < info[0]; i++) local_numbers[info[1]][info[2]][i] = mesh->nodesMap[ local_numbers[info[1]][info[2]][i] ]; MPI_NODE_TYPES[info[1]][info[2]] = MPI_ELNODE.Create_indexed( info[0], &lengths[0], &local_numbers[info[1]][info[2]][0] ); MPI_NODE_TYPES[info[1]][info[2]].Commit(); } MPI::Request::Waitall(reqs.size(), &reqs[0]); BARRIER("DataBus::createDynamicTypes#3"); for (int i = 0 ; i < numberOfWorkers; i++) delete[] remote_numbers[i]; delete[] remote_numbers; LOG_DEBUG("Building dynamic MPI types for fast node sync done"); }