예제 #1
0
void DataBus::transferNodes(TetrMeshSecondOrder* mesh, vector<AABB>* _reqZones)
{
    AABB **reqZones = new AABB*[numberOfWorkers];
    AABB *reqZones_data = new AABB[numberOfWorkers*numberOfWorkers];
    for ( int i = 0; i < numberOfWorkers; ++i ) {
        reqZones[i] = reqZones_data + (i*numberOfWorkers);
    }

    for (int i = 0 ; i < numberOfWorkers; i++)
        for (int j = 0 ; j < numberOfWorkers; j++)
            if( !isinf(_reqZones[i][j].minX) )
            {
                reqZones[i][j] = _reqZones[i].at(j);
                LOG_DEBUG("CPU " << i << " asks from CPU " << j << " area: " << reqZones[i][j]);
            }

    auto& engine = Engine::getInstance();
    Body* body = engine.getBodyById( engine.getDispatcher()->getMyBodyId() );
    TetrMeshSecondOrder* myMesh = (TetrMeshSecondOrder*)body->getMeshes();

    int numberOfNodes[numberOfWorkers][numberOfWorkers];
    int numberOfTetrs[numberOfWorkers][numberOfWorkers];
    for (int i = 0 ; i < numberOfWorkers; i++)
        for (int j = 0 ; j < numberOfWorkers; j++)
        {
            numberOfNodes[i][j] = 0;
            numberOfTetrs[i][j] = 0;
        }

    // Looking how many nodes and tetrs we are going to send
    map<int,int>* sendNodesMap = new map<int,int>[numberOfWorkers];
    map<int,int>* addNodesMap = new map<int,int>[numberOfWorkers];
    map<int,int>* sendTetrsMap = new map<int,int>[numberOfWorkers];

    for (int i = 0 ; i < numberOfWorkers; i++)
    {
        if( !isinf(reqZones[i][rank].minX) )
        {
            for( int j = 0; j < myMesh->nodesNumber; j++ )
            {
                CalcNode& node = myMesh->getNodeByLocalIndex(j);
                if( reqZones[i][rank].isInAABB( node ) )
                {
                    numberOfNodes[rank][i]++;
                    sendNodesMap[i][ node.number ] = j;
                }
            }
            for( int j = 0; j < myMesh->tetrsNumber; j++ )
            {
                TetrSecondOrder& tetr = myMesh->getTetr2ByLocalIndex(j);
                if( sendNodesMap[i].find(tetr.verts[0]) != sendNodesMap[i].end()
                    || sendNodesMap[i].find(tetr.verts[1]) != sendNodesMap[i].end()
                    || sendNodesMap[i].find(tetr.verts[2]) != sendNodesMap[i].end()
                    || sendNodesMap[i].find(tetr.verts[3]) != sendNodesMap[i].end()
                        || sendNodesMap[i].find(tetr.addVerts[0]) != sendNodesMap[i].end()
                        || sendNodesMap[i].find(tetr.addVerts[1]) != sendNodesMap[i].end()
                        || sendNodesMap[i].find(tetr.addVerts[2]) != sendNodesMap[i].end()
                        || sendNodesMap[i].find(tetr.addVerts[3]) != sendNodesMap[i].end()
                        || sendNodesMap[i].find(tetr.addVerts[4]) != sendNodesMap[i].end()
                        || sendNodesMap[i].find(tetr.addVerts[5]) != sendNodesMap[i].end() )
                {
                    numberOfTetrs[rank][i]++;
                    sendTetrsMap[i][ tetr.number ] = j;
                    for( int k = 0; k < 4; k++ )
                    {
                        if( sendNodesMap[i].find(tetr.verts[k]) == sendNodesMap[i].end()
                                && addNodesMap[i].find(tetr.verts[k]) == addNodesMap[i].end() )
                        {
                            numberOfNodes[rank][i]++;
                            addNodesMap[i][ tetr.verts[k] ] = myMesh->getNodeLocalIndex(tetr.verts[k]);
                        }
                    }
                    for( int k = 0; k < 6; k++ )
                    {
                        if( sendNodesMap[i].find(tetr.addVerts[k]) == sendNodesMap[i].end()
                                && addNodesMap[i].find(tetr.addVerts[k]) == addNodesMap[i].end() )
                        {
                            numberOfNodes[rank][i]++;
                            addNodesMap[i][ tetr.addVerts[k] ] = myMesh->getNodeLocalIndex(tetr.addVerts[k]);
                        }
                    }
                }
            }
        }
    }

    BARRIER("DataBus::transferNodes#1");
    MPI::COMM_WORLD.Allgather(
        MPI_IN_PLACE,
        numberOfWorkers, MPI_INT,
        numberOfNodes,
        numberOfWorkers, MPI_INT
    );

    BARRIER("DataBus::transferNodes#2");

    MPI::COMM_WORLD.Allgather(
        MPI_IN_PLACE,
        numberOfWorkers, MPI_INT,
        numberOfTetrs,
        numberOfWorkers, MPI_INT
    );

    BARRIER("DataBus::transferNodes#3");

    for (int i = 0 ; i < numberOfWorkers; i++)
        for (int j = 0 ; j < numberOfWorkers; j++)
            if( numberOfNodes[i][j] != 0 )
            {
                LOG_DEBUG("CPU " << i << " is going to send to CPU " << j << " "
                        << numberOfNodes[i][j] << " nodes and " << numberOfTetrs[i][j] << " tetrs");
//                if( rank == j && mesh->getNodesNumber() == 0 )
//                    mesh->createNodes(numberOfNodes[i][j]);
//                if( rank == j && mesh->getTetrsNumber() == 0 )
//                    mesh->createTetrs(numberOfTetrs[i][j]);
            }


    vector<MPI::Request> reqs;
    CalcNode** recNodes = new CalcNode*[numberOfWorkers];
    TetrSecondOrder** recTetrs = new TetrSecondOrder*[numberOfWorkers];
    for( int i = 0; i < numberOfWorkers; i++ )
    {
        if( i != rank && numberOfNodes[i][rank] > 0 )
        {
            recNodes[i] = new CalcNode[numberOfNodes[i][rank]];
            recTetrs[i] = new TetrSecondOrder[numberOfTetrs[i][rank]];
            reqs.push_back(
                MPI::COMM_WORLD.Irecv(
                    recNodes[i],
                    numberOfNodes[i][rank],
                    MPI_ELNODE_NUMBERED,
                    i,
                    TAG_GET_TETRS_N+i
                )
            );
            reqs.push_back(
                MPI::COMM_WORLD.Irecv(
                    recTetrs[i],
                    numberOfTetrs[i][rank],
                    MPI_TETR_NUMBERED,
                    i,
                    TAG_GET_TETRS_T+i
                )
            );
        }
        else
        {
            recNodes[i] = NULL;
            recTetrs[i] = NULL;
        }
    }

    int max_len = 0;
    for (int i = 0; i< numberOfWorkers; i++)
    {
        if (numberOfNodes[rank][i] > max_len)
            max_len = numberOfNodes[rank][i];
        if (numberOfTetrs[rank][i] > max_len)
            max_len = numberOfTetrs[rank][i];
    }
    int *lens = new int[max_len];
    for (int i = 0; i < max_len; i++)
        lens[i] = 1;

    MPI::Datatype *n = new MPI::Datatype[numberOfWorkers];
    MPI::Datatype *t = new MPI::Datatype[numberOfWorkers];
    vector<int> displ;
    map<int, int>::const_iterator itr;
    for( int i = 0; i < numberOfWorkers; i++ )
    {
        if( i != rank && numberOfNodes[rank][i] > 0 )
        {
            displ.clear();
            for( itr = sendNodesMap[i].begin(); itr != sendNodesMap[i].end(); ++itr )
                displ.push_back(itr->second);
            for( itr = addNodesMap[i].begin(); itr != addNodesMap[i].end(); ++itr )
                displ.push_back(itr->second);
            sort( displ.begin(), displ.end() );

            n[i] = MPI_ELNODE_NUMBERED.Create_indexed(numberOfNodes[rank][i], lens, &displ[0]);
            n[i].Commit();

            displ.clear();
            for( itr = sendTetrsMap[i].begin(); itr != sendTetrsMap[i].end(); ++itr )
                displ.push_back(itr->second);
            sort( displ.begin(), displ.end() );

            t[i] = MPI_TETR_NUMBERED.Create_indexed(numberOfTetrs[rank][i], lens, &displ[0]);
            t[i].Commit();

            reqs.push_back(
                MPI::COMM_WORLD.Isend(
                    &(myMesh->nodes[0]),
                    1,
                    n[i],
                    i,
                    TAG_GET_TETRS_N+rank
                )
            );
            reqs.push_back(
                MPI::COMM_WORLD.Isend(
                    &(myMesh->tetrs2[0]),//mesh->getTetrByLocalIndex(0),
                    1,
                    t[i],
                    i,
                    TAG_GET_TETRS_T+rank
                )
            );
        }
    }

    // FIXME - we suppose here that one process will send nodes for one mesh only (!)
    TetrMeshSecondOrder* targetMesh = NULL;
    MPI::Request::Waitall(reqs.size(), &reqs[0]);
    BARRIER("DataBus::transferNodes#4");
    LOG_DEBUG("Processing received data");
    for( int i = 0; i < numberOfWorkers; i++ )
    {
        if( i != rank && numberOfNodes[i][rank] > 0 )
        {
            LOG_DEBUG("Processing nodes");
            LOG_DEBUG("Worker " << rank << " data from " << i << ". "
                        << "Nodes size " << numberOfNodes[i][rank] << " "
                        << "Tetrs size " << numberOfTetrs[i][rank]);
            for( int j = 0; j < numberOfNodes[i][rank]; j++ )
            {
                int num = recNodes[i][j].number;
                unsigned char bodyNum = recNodes[i][j].bodyId;
                targetMesh = (TetrMeshSecondOrder*) engine.getBody(bodyNum)->getMeshes();
                if( targetMesh->getNodesNumber() == 0 )
                {
                    targetMesh->createNodes( numberOfNodes[i][rank] );
                    LOG_DEBUG("Nodes storage created for body " << (int)bodyNum << ". Size: " << numberOfNodes[i][rank]);
                }
                if( ! targetMesh->hasNode(num) )
                {
                    recNodes[i][j].setPlacement(false);
                    targetMesh->addNode(recNodes[i][j]);
                }
            }
            LOG_DEBUG("Processing tetrs");
            if( targetMesh->getTetrsNumber() == 0 )
            {
                targetMesh->createTetrs( numberOfTetrs[i][rank] );
                LOG_DEBUG("Tetrs storage created. Size: " << numberOfTetrs[i][rank]);
            }
            for( int j = 0; j < numberOfTetrs[i][rank]; j++ )
            {
                int num = recTetrs[i][j].number;
                if( ! targetMesh->hasTetr(num) )
                {
                    targetMesh->addTetr2(recTetrs[i][j]);
                }
            }
        }
    }
    reqs.clear();
    for( int i = 0; i < numberOfWorkers; i++ )
    {
        if( i != rank && numberOfNodes[rank][i] > 0 )
        {
            n[i].Free();
            t[i].Free();
        }
        if( recNodes[i] != NULL )
            delete[] recNodes[i];
        if( recTetrs[i] != NULL )
            delete[] recTetrs[i];
    }
    delete[] recNodes;
    delete[] recTetrs;
    delete[] sendNodesMap;
    delete[] addNodesMap;
    delete[] sendTetrsMap;
    delete[] lens;
    delete[] n;
    delete[] t;
    delete[] reqZones_data;
    delete[] reqZones;
    LOG_DEBUG("Nodes transfer done");
}
예제 #2
0
void DataBus::createDynamicTypes(int bodyNum)
{
    LOG_DEBUG("Building dynamic MPI types for fast node sync");
    auto& engine = Engine::getInstance();
    GCMDispatcher* dispatcher = engine.getDispatcher();
    Body* body = engine.getBody(bodyNum);//ById( engine.getDispatcher()->getMyBodyId() );
    TetrMeshSecondOrder* mesh = (TetrMeshSecondOrder*)body->getMeshes();

    // TODO add more cleanup code here to prevent memory leaks
    if (MPI_NODE_TYPES != NULL) {
        LOG_TRACE("Cleaning old types");
        for (int i = 0; i < numberOfWorkers; i++)
        {
            for (int j = 0; j < numberOfWorkers; j++)
            {
                LOG_TRACE("Cleaning type " << i << " " << j );
                LOG_TRACE("Size " << i << " " << j << " = " << local_numbers[i][j].size());
                if (local_numbers[i][j].size() > 0)
                    MPI_NODE_TYPES[i][j].Free();
            }
        }
        delete[] MPI_NODE_TYPES;
    }

    if (local_numbers != NULL) {
        for (int i = 0; i < numberOfWorkers; i++)
            delete[] local_numbers[i];
        delete[] local_numbers;
    }

    // FIXME - it's overhead
    local_numbers = new vector<int>*[numberOfWorkers];
    vector<int> **remote_numbers = new vector<int>*[numberOfWorkers];
    MPI_NODE_TYPES = new MPI::Datatype*[numberOfWorkers];

    for (int i = 0; i < numberOfWorkers; i++)
    {
        local_numbers[i] = new vector<int>[numberOfWorkers];
        remote_numbers[i] = new vector<int>[numberOfWorkers];
        MPI_NODE_TYPES[i] = new MPI::Datatype[numberOfWorkers];
    }

    BARRIER("DataBus::createDynamicTypes#0");

    // find all remote nodes
    for (int j = 0; j < mesh->getNodesNumber(); j++)
    {
        CalcNode& node = mesh->getNodeByLocalIndex(j);
        if ( node.isRemote() )
        {
            //LOG_DEBUG("N: " << j);
            //LOG_DEBUG("R1: " << j << " " << mesh->getBody()->getId());
            int owner = dispatcher->getOwner(node.coords/*, mesh->getBody()->getId()*/);
            //LOG_DEBUG("R2: " << owner);
            assert_ne(owner, rank );
            local_numbers[rank][owner].push_back( mesh->nodesMap[node.number] );
            remote_numbers[rank][owner].push_back(node.number);
        }
    }

    BARRIER("DataBus::createDynamicTypes#1");

    LOG_DEBUG("Requests prepared:");
    for (int i = 0; i < numberOfWorkers; i++)
        for (int j = 0; j < numberOfWorkers; j++)
            LOG_DEBUG("Request size from #" << i << " to #" << j << ": " << local_numbers[i][j].size());

    // sync types
    unsigned int max_len = 0;
    for (int i = 0; i < numberOfWorkers; i++)
        for (int j = 0; j < numberOfWorkers; j++)
            if (local_numbers[i][j].size() > max_len)
                max_len = local_numbers[i][j].size();

    vector<int> lengths;
    for (unsigned int i = 0; i < max_len; i++)
        lengths.push_back(1);

    int info[3];

    vector<MPI::Request> reqs;

    for (int i = 0; i < numberOfWorkers; i++)
        for (int j = 0; j < numberOfWorkers; j++)
            if (local_numbers[i][j].size() > 0)
            {
                info[0] = remote_numbers[i][j].size();
                info[1] = i;
                info[2] = j;
                MPI_NODE_TYPES[i][j] =  MPI_ELNODE.Create_indexed(
                    local_numbers[i][j].size(),
                    &lengths[0],
                    &local_numbers[i][j][0]
                );
                MPI_NODE_TYPES[i][j].Commit();
                reqs.push_back(
                    MPI::COMM_WORLD.Isend(
                        &remote_numbers[i][j][0],
                        remote_numbers[i][j].size(),
                        MPI::INT,
                        j,
                        TAG_SYNC_NODE_TYPES
                    )
                );
                reqs.push_back(
                    MPI::COMM_WORLD.Isend(
                        info,
                        3,
                        MPI::INT,
                        j,
                        TAG_SYNC_NODE_TYPES_I
                    )
                );
            }

    BARRIER("DataBus::createDynamicTypes#2");

    MPI::Status status;

    while (MPI::COMM_WORLD.Iprobe(MPI::ANY_SOURCE, TAG_SYNC_NODE_TYPES_I, status))
    {
        MPI::COMM_WORLD.Recv(
            info,
            3,
            MPI::INT,
            status.Get_source(),
            TAG_SYNC_NODE_TYPES_I
        );
        local_numbers[info[1]][info[2]].resize(info[0]);
        MPI::COMM_WORLD.Recv(
            &local_numbers[info[1]][info[2]][0],
            info[0],
            MPI::INT,
            status.Get_source(),
            TAG_SYNC_NODE_TYPES
        );
        if (lengths.size() < (unsigned)info[0])
            for (int i = lengths.size(); i < info[0]; i++)
                lengths.push_back(1);
        for(int i = 0; i < info[0]; i++)
            local_numbers[info[1]][info[2]][i] = mesh->nodesMap[ local_numbers[info[1]][info[2]][i] ];
        MPI_NODE_TYPES[info[1]][info[2]] =  MPI_ELNODE.Create_indexed(
            info[0],
            &lengths[0],
            &local_numbers[info[1]][info[2]][0]
        );
        MPI_NODE_TYPES[info[1]][info[2]].Commit();
    }

    MPI::Request::Waitall(reqs.size(), &reqs[0]);
    BARRIER("DataBus::createDynamicTypes#3");

    for (int i = 0 ; i < numberOfWorkers; i++)
        delete[] remote_numbers[i];
    delete[] remote_numbers;
    LOG_DEBUG("Building dynamic MPI types for fast node sync done");
}