int compute(){ //Assuming that the processes form a square int n_procs_row = sqrt(number_of_processes); int n_procs_col = n_procs_row; if (n_procs_col * n_procs_row != number_of_processes) { std::cerr << "number of proccessors must be a perfect square!" << std::endl; MPI_Abort(MPI_COMM_WORLD, 1); } int n_dims = 2; int dims[n_dims] = {n_procs_row, n_procs_col}; int periods[n_dims] = {0, 0}; int repeat = 0; //create comm_groups MPI_Comm comm_cart; MPI_Cart_create(MPI_COMM_WORLD, n_dims, dims, periods, repeat, &comm_cart); int m_block = m / n_procs_row; int n_block = n / n_procs_col; int k_block = k / n_procs_col; if (m_block * n_procs_row != m) { std::cerr << "m must be dividable by n_procs_row" << std::endl; MPI_Abort(MPI_COMM_WORLD, 1); } if (n_block * n_procs_col != n) { std::cerr << "n must be dividable by n_procs_col" << std::endl; MPI_Abort(MPI_COMM_WORLD, 1); } if (k_block * n_procs_col != k) { std::cerr << "k must be dividable by n_procs_col" << std::endl; MPI_Abort(MPI_COMM_WORLD, 1); } double * A_local = (double *) calloc(m_block * n_block, sizeof(double)); double * B_local = (double *) calloc(n_block * k_block, sizeof(double)); double * C_local = (double *) calloc(m_block * k_block, sizeof(double)); double * A = (double *) calloc(m * n, sizeof(double)); double * B = (double *) calloc(n * k, sizeof(double)); initMatrices(A_local, B_local, C_local, m_block, n_block, comm_cart); /** testing the scatter function */ if(rank == 0){ double * A_1 = (double *) calloc(n * k, sizeof(double)); double * B_1 = (double *) calloc(n * k, sizeof(double)); double * C_1 = (double *) calloc(n * k, sizeof(double)); initMatrices(A_1, B_1, C_1, n, n, comm_cart); distributeSquareMatrix(A_1, n, C_1); } gatherMatrix(m_block, n_block, A_local, m, n, A); gatherMatrix(n_block, k_block, B_local, n, k, B); /*if (rank == 3) { std::cout << "A" << std::endl; printMatrix(m_block,m_block, A_local); std::cout << "B" << std::endl; printMatrix(m_block,m_block, B_local); std::cout << "C" << std::endl; printMatrix(m_block,m_block, C_local); }*/ double start_time, end_time; start_time = MPI_Wtime(); summa(comm_cart, m_block, n_block, k_block, A_local, B_local, C_local); end_time = MPI_Wtime(); getTimes(start_time, end_time); double * C = (double *) calloc(m * n, sizeof(double)); double * C_naive = (double *) calloc(m * n, sizeof(double)); gatherMatrix(m_block, k_block, C_local, m, k, C); if (rank == 0) { multMatricesLineByLine(m, n, k, A, B, C_naive); double eps = validate(n, k, C, C_naive); if (eps > 1e-4) { std::cerr << "ERROR: Invalid matrix -> eps = " << eps << std::endl; MPI_Abort(MPI_COMM_WORLD, 1); } else { std::cout << "Valid matrix" << std::endl; } } }
void runPoisson(int rank, int size, int n){ double time=MPI_Wtime(); Real **b, *diag, *RecvBuf,*z, h, maxError; int i, j, m, nn, *len, *disp; m = n-1; nn = 4*n; splitVector(m, size, &len, &disp); diag = createRealArray (m); b = createReal2DArray (len[rank],m); z = createRealArray (nn); h = 1./(Real)n; #pragma omp parallel for schedule(static) for (i=0; i < m; i++) { diag[i] = 2.*(1.-cos((i+1)*M_PI/(Real)n)); } #pragma omp for for (j=0; j < len[rank]; j++) { #pragma omp parallel for schedule(static) for (i=0; i < m; i++) { Real x=(Real)(j+1+disp[rank])/n; Real y=(Real) (i+1)/n; b[j][i] = h*h * funcf(x,y); } } #pragma omp parallel for schedule(static) for (j=0; j < len[rank]; j++) { Real* zt = createRealArray (nn); fst_(b[j], &n, zt, &nn); free(zt); } transpose(b, size, len, disp, rank, m); #pragma omp parallel for schedule(static) for (i=0; i < len[rank]; i++) { Real* zt = createRealArray (nn); fstinv_(b[i], &n, zt, &nn); free(zt); } #pragma omp for for (j=0; j < len[rank]; j++) { #pragma omp parallel for schedule(static) for (i=0; i < m; i++) { b[j][i] = b[j][i]/(diag[i]+diag[j+disp[rank]]); } } #pragma omp parallel for schedule(static) for (i=0; i < len[rank]; i++) { Real* zt = createRealArray (nn); fst_(b[i], &n, zt, &nn); free(zt); } transpose(b, size, len, disp, rank, m); #pragma omp parallel for schedule(static) for (j=0; j < len[rank]; j++) { Real* zt = createRealArray (nn); fstinv_(b[j], &n, zt, &nn); free(zt); } if (rank==0) { RecvBuf = createRealArray (m*m); } gatherMatrix(b, m, RecvBuf, len, disp,0); if (rank==0) { for (int j=0; j < m; j++) { for (int i=0; i < m; i++) { printf("%e %e %e \n",(Real)i/m,(Real)j/m,RecvBuf[j*m+i] ); } } } }