Example #1
0
// This routine set up the IloCplex algorithm to solve the worker LP, and
// creates the worker LP (i.e., the dual of flow constraints and
// capacity constraints of the flow MILP)
//
// Modeling variables:
// forall k in V0, i in V:
//    u(k,i) = dual variable associated with flow constraint (k,i)
//
// forall k in V0, forall (i,j) in A:
//    v(k,i,j) = dual variable associated with capacity constraint (k,i,j)
//
// Objective:
// minimize sum(k in V0) sum((i,j) in A) x(i,j) * v(k,i,j)
//          - sum(k in V0) u(k,0) + sum(k in V0) u(k,k)
//
// Constraints:
// forall k in V0, forall (i,j) in A: u(k,i) - u(k,j) <= v(k,i,j)
//
// Nonnegativity on variables v(k,i,j)
// forall k in V0, forall (i,j) in A: v(k,i,j) >= 0
//
void
createWorkerLP(IloCplex cplex, IloNumVarArray v, IloNumVarArray u, 
               IloObjective obj, IloInt numNodes)
{

   IloInt i, j, k;
   IloEnv env = cplex.getEnv();
   IloModel mod(env, "atsp_worker"); 

   // Set up IloCplex algorithm to solve the worker LP

   cplex.extract(mod);
   cplex.setOut(env.getNullStream());
      
   // Turn off the presolve reductions and set the CPLEX optimizer
   // to solve the worker LP with primal simplex method.

   cplex.setParam(IloCplex::Reduce, 0);
   cplex.setParam(IloCplex::RootAlg, IloCplex::Primal); 
   
   // Create variables v(k,i,j) forall k in V0, (i,j) in A
   // For simplicity, also dummy variables v(k,i,i) are created.
   // Those variables are fixed to 0 and do not partecipate to 
   // the constraints.

   IloInt numArcs  = numNodes * numNodes;
   IloInt vNumVars = (numNodes-1) * numArcs;
   IloNumVarArray vTemp(env, vNumVars, 0, IloInfinity);
   for (k = 1; k < numNodes; ++k) {
      for (i = 0; i < numNodes; ++i) {
         vTemp[(k-1)*numArcs + i *numNodes + i].setBounds(0, 0);
      }
   }
   v.clear();
   v.add(vTemp);
   vTemp.end();
   mod.add(v);

   // Set names for variables v(k,i,j) 

   for (k = 1; k < numNodes; ++k) {
      for(i = 0; i < numNodes; ++i) {
         for(j = 0; j < numNodes; ++j) {
            char varName[100];
            sprintf(varName, "v.%d.%d.%d", (int) k, (int) i, (int) j); 
            v[(k-1)*numArcs + i*numNodes + j].setName(varName);
         }
      }
   }
   
   // Associate indices to variables v(k,i,j)

   IloIntArray vIndex(env, vNumVars);
   for (j = 0; j < vNumVars; ++j)
   {
      vIndex[j] = j;
      v[j].setObject(&vIndex[j]);
   }

   // Create variables u(k,i) forall k in V0, i in V

   IloInt uNumVars = (numNodes-1) * numNodes;
   IloNumVarArray uTemp(env, uNumVars, -IloInfinity, IloInfinity);
   u.clear();
   u.add(uTemp);
   uTemp.end();
   mod.add(u);

   // Set names for variables u(k,i) 

   for (k = 1; k < numNodes; ++k) {
      for(i = 0; i < numNodes; ++i) {
         char varName[100];
         sprintf(varName, "u.%d.%d", (int) k, (int) i); 
         u[(k-1)*numNodes + i].setName(varName);
      }
   }

   // Associate indices to variables u(k,i)

   IloIntArray uIndex(env, uNumVars);
   for (j = 0; j < uNumVars; ++j)
   {
      uIndex[j] = vNumVars + j;
      u[j].setObject(&uIndex[j]);
   }

   // Initial objective function is empty

   obj.setSense(IloObjective::Minimize);
   mod.add(obj);

   // Add constraints:
   // forall k in V0, forall (i,j) in A: u(k,i) - u(k,j) <= v(k,i,j)

   for (k = 1; k < numNodes; ++k) {
      for(i = 0; i < numNodes; ++i) {
         for(j = 0; j < numNodes; ++j) {
            if ( i != j ) {
               IloExpr expr(env);
               expr -= v[(k-1)*numArcs + i*(numNodes) + j];
               expr += u[(k-1)*numNodes + i];
               expr -= u[(k-1)*numNodes + j];
               mod.add(expr <= 0);
               expr.end();
            }
         }
      }
   }

}// END createWorkerLP
Example #2
0
void GrayScott::step()
{
    // update step
    if (world.rank == 0) {
        ++currStep_;
    }
    
    
    MPI_Request request[8];
    MPI_Status status[8];
    
    // exchange boundaries along y-direction
    if (world.coord_x % 2 == 0) { // first send top, then botton
        
        MPI_Isend(&u_[(Nx_loc)*(Ny_loc)],   1, bottom_boundary, world.bottom_proc, TAG, cart_comm, &request[0]);
        MPI_Irecv(&u_[(Nx_loc+1)*(Ny_loc)], 1, bottom_boundary, world.bottom_proc, TAG, cart_comm, &request[1]);
        MPI_Isend(&u_[(Ny_loc)],            1, top_boundary,    world.top_proc,    TAG, cart_comm, &request[2]);
        MPI_Irecv(&u_[0],                   1, top_boundary,    world.top_proc,    TAG, cart_comm, &request[3]);
        
        MPI_Isend(&v_[(Nx_loc)*(Ny_loc)],   1, bottom_boundary, world.bottom_proc, TAG, cart_comm, &request[4]);
        MPI_Irecv(&v_[(Nx_loc+1)*(Ny_loc)], 1, bottom_boundary, world.bottom_proc, TAG, cart_comm, &request[5]);
        MPI_Isend(&v_[(Ny_loc)],            1, top_boundary,    world.top_proc,    TAG, cart_comm, &request[6]);
        MPI_Irecv(&v_[0],                   1, top_boundary,    world.top_proc,    TAG, cart_comm, &request[7]);
    }
    else { // first send botton, then top
        
        MPI_Irecv(&u_[0],                   1, top_boundary,    world.top_proc,    TAG, cart_comm, &request[0]);
        MPI_Isend(&u_[(Ny_loc)],            1, top_boundary,    world.top_proc,    TAG, cart_comm, &request[1]);
        MPI_Irecv(&u_[(Nx_loc+1)*(Ny_loc)], 1, bottom_boundary, world.bottom_proc, TAG, cart_comm, &request[2]);
        MPI_Isend(&u_[(Nx_loc)*(Ny_loc)],   1, bottom_boundary, world.bottom_proc, TAG, cart_comm, &request[3]);
        
        MPI_Irecv(&v_[0],                   1, top_boundary,    world.top_proc,    TAG, cart_comm, &request[4]);
        MPI_Isend(&v_[(Ny_loc)],            1, top_boundary,    world.top_proc,    TAG, cart_comm, &request[5]);
        MPI_Irecv(&v_[(Nx_loc+1)*(Ny_loc)], 1, bottom_boundary, world.bottom_proc, TAG, cart_comm, &request[6]);
        MPI_Isend(&v_[(Nx_loc)*(Ny_loc)],   1, bottom_boundary, world.bottom_proc, TAG, cart_comm, &request[7]);
    }
    
    
    
    
    // u and v at the half step
    std::vector<double> uTemp(u_.size());
    std::vector<double> vTemp(v_.size());
    
    // right hand sides for u and for v
    std::vector<double> uRhs(N_);
    std::vector<double> vRhs(N_);
    
    
    
    
    /****************** DIFFUSION (ADI) ***************************************/
    
    // perform the first half-step
    // loop over all rows
    
    
    // parallelize outer loop (y-direction) with mpi, inner loop with openmp (TODO)
    
    // inner grid points
    #pragma omp parallel num_threads(nthreads_)
    {
    std::vector<double> puRhs(N_);
    std::vector<double> pvRhs(N_);
    #pragma omp for
    for (int i=1; i<Nx_loc-1; ++i) {
        // create right-hand side of the systems
        for (int j=0; j<N_; ++j) {
            puRhs[j] = U(i,j) + uCoeff * (U(i+1,j) - 2.*U(i,j) + U(i-1,j));
            pvRhs[j] = V(i,j) + vCoeff * (V(i+1,j) - 2.*V(i,j) + V(i-1,j));
        }
        
        TriDiagMatrixSolver::solve(N_, matU1_, puRhs, &UTEMP(i,0), 1);
        TriDiagMatrixSolver::solve(N_, matV1_, pvRhs, &VTEMP(i,0), 1);
    }
    } // omp parallel
    
    
    
    // wait for boundaries to arrive
    MPI_Waitall(8,request,status);
    
    
    // update local boundaries

    if (world.rank == 0) {
        // i=0 local and global
        for (int j=0; j<N_; ++j) {
            uRhs[j] = U(0,j) + uCoeff * (U(1,j) - U(0,j));
            vRhs[j] = V(0,j) + vCoeff * (V(1,j) - V(0,j));
        }
        TriDiagMatrixSolver::solve(N_, matU1_, uRhs, &UTEMP(0,0), 1);
        TriDiagMatrixSolver::solve(N_, matV1_, vRhs, &VTEMP(0,0), 1);
    }
    else {
        // i=0 local
        for (int j=0; j<N_; ++j) {
            uRhs[j] = U(0,j) + uCoeff * (U(0+1,j) - 2.*U(0,j) + U(0-1,j));
            vRhs[j] = V(0,j) + vCoeff * (V(0+1,j) - 2.*V(0,j) + V(0-1,j));
        }
        
        TriDiagMatrixSolver::solve(N_, matU1_, uRhs, &UTEMP(0,0), 1);
        TriDiagMatrixSolver::solve(N_, matV1_, vRhs, &VTEMP(0,0), 1);
    }
    
    
    if (world.rank == world.size-1) {
        // i=Nx_loc-1 local and i=N_-1 global
        for (int j=0; j<N_; ++j) {
            uRhs[j] = U(Nx_loc-1,j) + uCoeff * (- U(Nx_loc-1,j) + U(Nx_loc-2,j));
            vRhs[j] = V(Nx_loc-1,j) + vCoeff * (- V(Nx_loc-1,j) + V(Nx_loc-2,j));
        }
        TriDiagMatrixSolver::solve(N_, matU1_, uRhs, &UTEMP(Nx_loc-1,0), 1);
        TriDiagMatrixSolver::solve(N_, matV1_, vRhs, &VTEMP(Nx_loc-1,0), 1);
    }
    else {
        // i=Nx_loc-1 local
        for (int j=0; j<N_; ++j) {
            uRhs[j] = U(Nx_loc-1,j) + uCoeff * (U(Nx_loc-1+1,j) - 2.*U(Nx_loc-1,j) + U(Nx_loc-1-1,j));
            vRhs[j] = V(Nx_loc-1,j) + vCoeff * (V(Nx_loc-1+1,j) - 2.*V(Nx_loc-1,j) + V(Nx_loc-1-1,j));
        }
        
        TriDiagMatrixSolver::solve(N_, matU1_, uRhs, &UTEMP(Nx_loc-1,0), 1);
        TriDiagMatrixSolver::solve(N_, matV1_, vRhs, &VTEMP(Nx_loc-1,0), 1);
    }
    
    
    MPI_Barrier(MPI_COMM_WORLD);
    
    
    // transpose matrix
    
    // TODO either send-datatype also for recieve and then local transpose, or recv-datatype
    // -> test which faster
    
    // transpose global blocks (send from uTemp to u_)
    // start at Ny_loc, because we ignore the ghost cells
    
    if (localtranspose_) {
        MPI_Alltoall(&uTemp[Ny_loc], 1, block_resized_send, &u_[Ny_loc], 1, block_resized_send, MPI_COMM_WORLD);
        MPI_Alltoall(&vTemp[Ny_loc], 1, block_resized_send, &v_[Ny_loc], 1, block_resized_send, MPI_COMM_WORLD);
        
        // locally transpose blocks
        #pragma omp parallel num_threads(nthreads_)// for private(ind1) private(ind2)
        {
        int ind1, ind2;
        #pragma omp for
        for (int b=0; b<Nb_loc; ++b) {
            for (int i=0; i<Nx_loc; ++i) {
                for (int j=0; j<i; ++j) {
                    ind1 = (i+1)*Ny_loc + j + b*Nx_loc; // regular index + offset of block
                    ind2 = (j+1)*Ny_loc + i + b*Nx_loc; // switch i and j
                    
                    std::swap(u_[ind1], u_[ind2]);
                    std::swap(v_[ind1], v_[ind2]);
                }
            }
        }
        } // omp parallel
    }
    else {
        MPI_Alltoall(&uTemp[Ny_loc], 1, block_resized_send, &u_[Ny_loc], 1, block_resized_recv, MPI_COMM_WORLD);
        MPI_Alltoall(&vTemp[Ny_loc], 1, block_resized_send, &v_[Ny_loc], 1, block_resized_recv, MPI_COMM_WORLD);
    }
    


    // exchange new boundaries
    if (world.coord_x % 2 == 0) { // first send top, then bottom
        
        MPI_Isend(&u_[(Nx_loc)*(Ny_loc)],   1, bottom_boundary, world.bottom_proc, TAG, cart_comm, &request[0]);
        MPI_Irecv(&u_[(Nx_loc+1)*(Ny_loc)], 1, bottom_boundary, world.bottom_proc, TAG, cart_comm, &request[1]);
        MPI_Isend(&u_[(Ny_loc)],            1, top_boundary,    world.top_proc,    TAG, cart_comm, &request[2]);
        MPI_Irecv(&u_[0],                   1, top_boundary,    world.top_proc,    TAG, cart_comm, &request[3]);
        
        MPI_Isend(&v_[(Nx_loc)*(Ny_loc)],   1, bottom_boundary, world.bottom_proc, TAG, cart_comm, &request[4]);
        MPI_Irecv(&v_[(Nx_loc+1)*(Ny_loc)], 1, bottom_boundary, world.bottom_proc, TAG, cart_comm, &request[5]);
        MPI_Isend(&v_[(Ny_loc)],            1, top_boundary,    world.top_proc,    TAG, cart_comm, &request[6]);
        MPI_Irecv(&v_[0],                   1, top_boundary,    world.top_proc,    TAG, cart_comm, &request[7]);
    }
    else { // first send bottom, then top
        
        MPI_Irecv(&u_[0],                   1, top_boundary,    world.top_proc,    TAG, cart_comm, &request[0]);
        MPI_Isend(&u_[(Ny_loc)],            1, top_boundary,    world.top_proc,    TAG, cart_comm, &request[1]);
        MPI_Irecv(&u_[(Nx_loc+1)*(Ny_loc)], 1, bottom_boundary, world.bottom_proc, TAG, cart_comm, &request[2]);
        MPI_Isend(&u_[(Nx_loc)*(Ny_loc)],   1, bottom_boundary, world.bottom_proc, TAG, cart_comm, &request[3]);
        
        MPI_Irecv(&v_[0],                   1, top_boundary,    world.top_proc,    TAG, cart_comm, &request[4]);
        MPI_Isend(&v_[(Ny_loc)],            1, top_boundary,    world.top_proc,    TAG, cart_comm, &request[5]);
        MPI_Irecv(&v_[(Nx_loc+1)*(Ny_loc)], 1, bottom_boundary, world.bottom_proc, TAG, cart_comm, &request[6]);
        MPI_Isend(&v_[(Nx_loc)*(Ny_loc)],   1, bottom_boundary, world.bottom_proc, TAG, cart_comm, &request[7]);
    }

    

    // inner grid points
    #pragma omp parallel num_threads(nthreads_)
    {
    std::vector<double> puRhs(N_);
    std::vector<double> pvRhs(N_);
    #pragma omp for
    for (int i=1; i<Nx_loc-1; ++i) {
        // create right-hand side of the systems
        for (int j=0; j<N_; ++j) {
            puRhs[j] = U(i,j) + uCoeff * (U(i+1,j) - 2.*U(i,j) + U(i-1,j));
            pvRhs[j] = V(i,j) + vCoeff * (V(i+1,j) - 2.*V(i,j) + V(i-1,j));
        }
        
        TriDiagMatrixSolver::solve(N_, matU1_, puRhs, &UTEMP(i,0), 1);
        TriDiagMatrixSolver::solve(N_, matV1_, pvRhs, &VTEMP(i,0), 1);
    }
    } // omp parallel
    
    
    // wait for boundaries to arrive
    MPI_Waitall(8,request,status);
    
    
    // update local boundaries

    // top
    if (world.rank == 0) {
        // i=0 local and global
        for (int j=0; j<N_; ++j) {
            uRhs[j] = U(0,j) + uCoeff * (U(1,j) - U(0,j));
            vRhs[j] = V(0,j) + vCoeff * (V(1,j) - V(0,j));
        }
        TriDiagMatrixSolver::solve(N_, matU1_, uRhs, &UTEMP(0,0), 1);
        TriDiagMatrixSolver::solve(N_, matV1_, vRhs, &VTEMP(0,0), 1);
    }
    else {
        // i=0 local, but not globally
        for (int j=0; j<N_; ++j) {
            uRhs[j] = U(0,j) + uCoeff * (U(0+1,j) - 2.*U(0,j) + U(0-1,j));
            vRhs[j] = V(0,j) + vCoeff * (V(0+1,j) - 2.*V(0,j) + V(0-1,j));
        }
        
        TriDiagMatrixSolver::solve(N_, matU1_, uRhs, &UTEMP(0,0), 1);
        TriDiagMatrixSolver::solve(N_, matV1_, vRhs, &VTEMP(0,0), 1);
    }
    
    // bottom
    if (world.rank == world.size-1) {
        // i=Nx_loc-1 local and i=N_-1 global
        for (int j=0; j<N_; ++j) {
            uRhs[j] = U(Nx_loc-1,j) + uCoeff * (- U(Nx_loc-1,j) + U(Nx_loc-2,j));
            vRhs[j] = V(Nx_loc-1,j) + vCoeff * (- V(Nx_loc-1,j) + V(Nx_loc-2,j));
        }
        TriDiagMatrixSolver::solve(N_, matU1_, uRhs, &UTEMP(Nx_loc-1,0), 1);
        TriDiagMatrixSolver::solve(N_, matV1_, vRhs, &VTEMP(Nx_loc-1,0), 1);
    }
    else {
        // i=Nx_loc-1 local
        for (int j=0; j<N_; ++j) {
            uRhs[j] = U(Nx_loc-1,j) + uCoeff * (U(Nx_loc-1+1,j) - 2.*U(Nx_loc-1,j) + U(Nx_loc-1-1,j));
            vRhs[j] = V(Nx_loc-1,j) + vCoeff * (V(Nx_loc-1+1,j) - 2.*V(Nx_loc-1,j) + V(Nx_loc-1-1,j));
        }
        
        TriDiagMatrixSolver::solve(N_, matU1_, uRhs, &UTEMP(Nx_loc-1,0), 1);
        TriDiagMatrixSolver::solve(N_, matV1_, vRhs, &VTEMP(Nx_loc-1,0), 1);
    }
    
    
    
    MPI_Barrier(MPI_COMM_WORLD);
    
    // transpose back
    
    // transpose global blocks (send from uTemp to u_)
    // start at Ny_loc, because we ignore the ghost cells
    
    if (localtranspose_) {
        MPI_Alltoall(&uTemp[Ny_loc], 1, block_resized_send, &u_[Ny_loc], 1, block_resized_send, MPI_COMM_WORLD);
        MPI_Alltoall(&vTemp[Ny_loc], 1, block_resized_send, &v_[Ny_loc], 1, block_resized_send, MPI_COMM_WORLD);
        
        // locally transpose blocks
        #pragma omp parallel num_threads(nthreads_)// for private(ind1) private(ind2)
        {
        int ind1, ind2;
        #pragma omp for
        for (int b=0; b<Nb_loc; ++b) {
            for (int i=0; i<Nx_loc; ++i) {
                for (int j=0; j<i; ++j) {
                    ind1 = (i+1)*Ny_loc + j + b*Nx_loc; // regular index + offset of block
                    ind2 = (j+1)*Ny_loc + i + b*Nx_loc; // switch i and j
                    
                    std::swap(u_[ind1], u_[ind2]);
                    std::swap(v_[ind1], v_[ind2]);
                }
            }
        }
        } // omp parallel
    }
    else {
        MPI_Alltoall(&uTemp[Ny_loc], 1, block_resized_send, &u_[Ny_loc], 1, block_resized_recv, MPI_COMM_WORLD);
        MPI_Alltoall(&vTemp[Ny_loc], 1, block_resized_send, &v_[Ny_loc], 1, block_resized_recv, MPI_COMM_WORLD);
    }
    
    
    
    /****************** REACTION **********************************************/

    #pragma omp parallel num_threads(nthreads_)
    {
    double uind, vind;
    #pragma omp for collapse(2)
    for (int j=0; j<Ny_loc; ++j) {
        for (int i=0; i<Nx_loc; ++i) {
//            const int ind = (i+1)*Ny_loc + j;
//            uind = u_[ind];
//            vind = v_[ind];
//            u_[ind] += dt_ * ( -uind*vind*vind + F_*(1.-uind) );
//            v_[ind] += dt_ * ( uind*vind*vind - (F_+k_)*vind );
            uind = U(i,j);
            vind = V(i,j);
            U(i,j) += dt_ * ( -uind*vind*vind + F_*(1.-uind) );
            V(i,j) += dt_ * ( uind*vind*vind - (F_+k_)*vind );
        }
    }
    } // omp parallel
    
    MPI_Barrier(MPI_COMM_WORLD);
}