Ejemplo n.º 1
0
int compute(){
    //Assuming that the processes form a square
    int n_procs_row = sqrt(number_of_processes);
    int n_procs_col = n_procs_row;
    if (n_procs_col * n_procs_row != number_of_processes) {
        std::cerr << "number of proccessors must be a perfect square!" << std::endl;
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    int n_dims = 2;
    int dims[n_dims] = {n_procs_row, n_procs_col};
    int periods[n_dims] = {0, 0};
    int repeat = 0;

    //create comm_groups
    MPI_Comm comm_cart;
    MPI_Cart_create(MPI_COMM_WORLD, n_dims, dims, periods, repeat, &comm_cart);

    int m_block = m / n_procs_row;
    int n_block = n / n_procs_col;
    int k_block = k / n_procs_col;

    if (m_block * n_procs_row != m) {
        std::cerr << "m must be dividable by n_procs_row" << std::endl;
        MPI_Abort(MPI_COMM_WORLD, 1);
    }
    if (n_block * n_procs_col != n) {
        std::cerr << "n must be dividable by n_procs_col" << std::endl;
        MPI_Abort(MPI_COMM_WORLD, 1);
    }
    if (k_block * n_procs_col != k) {
        std::cerr << "k must be dividable by n_procs_col" << std::endl;
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    double * A_local = (double *) calloc(m_block * n_block, sizeof(double));
    double * B_local = (double *) calloc(n_block * k_block, sizeof(double));
    double * C_local = (double *) calloc(m_block * k_block, sizeof(double));

    double * A = (double *) calloc(m * n, sizeof(double));
    double * B = (double *) calloc(n * k, sizeof(double));

    initMatrices(A_local, B_local, C_local, m_block, n_block, comm_cart);


    /**
    testing the scatter function
    */
    if(rank == 0){
        double * A_1 = (double *) calloc(n * k, sizeof(double));
        double * B_1 = (double *) calloc(n * k, sizeof(double));
        double * C_1 = (double *) calloc(n * k, sizeof(double));

        initMatrices(A_1, B_1, C_1, n, n, comm_cart);

        distributeSquareMatrix(A_1, n, C_1);
    }

    gatherMatrix(m_block, n_block, A_local, m, n, A);
    gatherMatrix(n_block, k_block, B_local, n, k, B);
    /*if (rank == 3) {
    std::cout << "A" << std::endl;
      printMatrix(m_block,m_block, A_local);
      std::cout << "B" << std::endl;
      printMatrix(m_block,m_block, B_local);
      std::cout << "C" << std::endl;
      printMatrix(m_block,m_block, C_local);
    }*/
    double start_time, end_time;

    start_time = MPI_Wtime();

    summa(comm_cart, m_block, n_block, k_block, A_local, B_local, C_local);

    end_time = MPI_Wtime();

    getTimes(start_time, end_time);

    double * C = (double *) calloc(m * n, sizeof(double));
    double * C_naive = (double *) calloc(m * n, sizeof(double));

    gatherMatrix(m_block, k_block, C_local, m, k, C);

    if (rank == 0) {

        multMatricesLineByLine(m, n, k, A, B, C_naive);

        double eps = validate(n, k, C, C_naive);
        if (eps > 1e-4) {
            std::cerr <<  "ERROR: Invalid matrix -> eps = " << eps << std::endl;
            MPI_Abort(MPI_COMM_WORLD, 1);
        } else {
            std::cout << "Valid matrix" << std::endl;
        }
    }
}
Ejemplo n.º 2
0
void runPoisson(int rank, int size, int n){
  double time=MPI_Wtime();
  Real **b, *diag, *RecvBuf,*z, h, maxError;
  int i, j, m, nn, *len, *disp;

  m  = n-1;
  nn = 4*n;
  splitVector(m, size, &len, &disp);
  diag = createRealArray (m);
  b    = createReal2DArray (len[rank],m);
  z    = createRealArray (nn);
  h    = 1./(Real)n;

  #pragma omp parallel for schedule(static)
  for (i=0; i < m; i++) {
    diag[i] = 2.*(1.-cos((i+1)*M_PI/(Real)n));
  }

  #pragma omp for
  for (j=0; j < len[rank]; j++) {
  #pragma omp parallel for schedule(static)
    for (i=0; i < m; i++) {
      Real x=(Real)(j+1+disp[rank])/n;
      Real y=(Real) (i+1)/n;
      b[j][i] = h*h * funcf(x,y);
    }
  }

  #pragma omp parallel for schedule(static)
  for (j=0; j < len[rank]; j++) {
    Real* zt = createRealArray (nn);
    fst_(b[j], &n, zt, &nn);
    free(zt);
  }

  transpose(b, size, len, disp, rank, m);

  #pragma omp parallel for schedule(static)
  for (i=0; i < len[rank]; i++) {
    Real* zt  = createRealArray (nn);
    fstinv_(b[i], &n, zt, &nn);
    free(zt);
  }

  #pragma omp for
  for (j=0; j < len[rank]; j++) {
  #pragma omp parallel for schedule(static)
    for (i=0; i < m; i++) {
      b[j][i] = b[j][i]/(diag[i]+diag[j+disp[rank]]);
    }
  }

  #pragma omp parallel for schedule(static)
  for (i=0; i < len[rank]; i++) {
    Real* zt  = createRealArray (nn);
    fst_(b[i], &n, zt, &nn);
    free(zt);
  }

  transpose(b, size, len, disp, rank, m);

  #pragma omp parallel for schedule(static)
  for (j=0; j < len[rank]; j++) {
    Real* zt  = createRealArray (nn);
    fstinv_(b[j], &n, zt, &nn);
    free(zt);
  }




  if (rank==0)
  {
    RecvBuf = createRealArray (m*m);
  }
  gatherMatrix(b, m, RecvBuf, len, disp,0);

  if (rank==0)
  {
    for (int j=0; j < m; j++) {
      for (int i=0; i < m; i++) {
        printf("%e %e %e \n",(Real)i/m,(Real)j/m,RecvBuf[j*m+i] );
      }
    }
  }
}