int main(int argc, char **argv) { MPI_Init(&argc, &argv); int nprocs, rank; MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); int numthreads = omp_get_max_threads(); if (argc < 2) { printf("Usage:\n"); printf(" poisson n\n\n"); printf("Arguments:\n"); printf(" n: the problem size (must be a power of 2)\n"); } double time_start; if (rank == 0) { time_start = MPI_Wtime(); } // The number of grid points in each direction is n+1 // The number of degrees of freedom in each direction is n-1 = m int n = atoi(argv[1]); int m = n - 1; int nn = 4 * n; real h = 1.0 / n; // Splitting the matrix into columns: int exact = n/nprocs; int rem = m - (nprocs - 1)*exact; // Size of each process owns a strip matrix which is m*exact or m*remain. // We consider that each such a matrix is made of 'nprocs' blocks vertically. int block_col = exact; int block_uk = exact*exact; int rem_uk = exact*rem; // For the last such strip, number of columns is rem. Consequently: if (rank == nprocs-1){ block_col = rem; block_uk = rem*exact; rem_uk = rem*rem; } // Grid points real *grid = mk_1D_array(n+1, false); #pragma omp parallel for schedule(static) for (size_t i = 0; i < n+1; i++) { grid[i] = i * h; } // The diagonal of the eigenvalue matrix of T real *diag = mk_1D_array(m, false); #pragma omp parallel for schedule(static) for (size_t i = 0; i < m; i++) { diag[i] = 2.0 * (1.0 - cos((i+1) * PI / n)); } // Initialize the right hand side data // B is the column strip that the process owns.* real **B = mk_2D_array(block_col, m, false); #pragma omp parallel for schedule(static) for (size_t i = 0; i < block_col; i++) { for (size_t j = 0; j < m; j++) { B[i][j] = h * h * rhs(grid[i+1+(rank*exact)], grid[j+1]); } } // For the Sine Transform: real **z = mk_2D_array(numthreads, nn, false); // Calculate Btilde^T = S^-1 * (S * B)^T #pragma omp parallel for schedule(static) for (size_t i = 0; i < block_col; i++) { fst_(B[i], &n, z[omp_get_thread_num()], &nn); } transpose(B, block_col, m, nprocs, block_uk, rem_uk, rank); #pragma omp parallel for schedule(static) for (size_t i = 0; i < block_col; i++) { fstinv_(B[i], &n, z[omp_get_thread_num()], &nn); } // Solve Lambda * Xtilde = Btilde #pragma omp parallel for schedule(static) for (size_t i = 0; i < block_col; i++) { for (size_t j = 0; j < m; j++) { B[i][j] = B[i][j] / (diag[i+(rank*exact)] + diag[j]); } } // Calculate X = S^-1 * (S * Xtilde^T) ^ T #pragma omp parallel for schedule(static) for (size_t i = 0; i < block_col; i++) { fst_(B[i], &n, z[omp_get_thread_num()], &nn); } transpose(B, block_col, m, nprocs, block_uk, rem_uk, rank); #pragma omp parallel for schedule(static) for (size_t i = 0; i < block_col; i++) { fstinv_(B[i], &n, z[omp_get_thread_num()], &nn); } // Calculate maximal value of solution double U_max = 0.0, e_max = 0.0, global_max, global_emax, error; for (size_t i = 0; i < block_col; i++){ for (size_t j = 0; j < m; j++){ error = fabs(B[i][j] - sin(PI*(i+1+(rank*exact))*h)*sin(2*PI*(j+1)*h)); U_max = U_max > B[i][j] ? U_max : B[i][j]; e_max = e_max > error ? e_max : error; } } // MPI_Max to find the true maximum: MPI_Reduce(&U_max, &global_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&e_max, &global_emax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); // Print the Global Maximum on process 0: if (rank == 0){ printf("Problem Size = %d\tNumprocs = %d\tNumthreads = %d\n", n, nprocs, numthreads); printf("U_max = %0.16f\n", global_max); printf("E_max = %0.16f\n", global_emax); double duration = MPI_Wtime() - time_start ; printf("Execution Time: %0.16f \n", duration); } MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int size , rank, number; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD , &size); MPI_Comm_rank(MPI_COMM_WORLD , &rank); double start = MPI_Wtime(); //Starter klokka double umaxglob=0; //Max feil for alle tråder if (argc < 2) { printf("Usage:\n"); printf(" poisson n\n\n"); printf("Arguments:\n"); printf(" n: the problem size (must be a power of 2)\n"); } // The number of grid points in each direction is n+1 // The number of degrees of freedom in each direction is n-1 int n = atoi(argv[1]); int m = n - 1; // ant punk hver vei i B int *cnt = (int *) malloc(size * sizeof(int)); //loakal ant kolonner i matrix int *displs = (int *) malloc((size+1) * sizeof(int)); //lokal displacement for de andre prosessorene sine punkter i sendbuf displs[size] = m; displs[0]=0; //Displacement til første prosessor er alltid 0 int overflow = m % size; //ant kolonner som blir til overs for(int i = 0;i<size;i++){ cnt[i] = m / size; // nrColon for hver prosessor if (overflow != 0){ cnt[i]++; //fordeler de ekstra kolonnene overflow--; } if (i < size-1){ displs[i+1] = displs[i]+cnt[i]; } } int nrColon = cnt[rank]; //ant kolonner "jeg" har int pros_dof = nrColon*m; //ant lementer jeg har int nn = 4 * n; double h = 1.0 / n; // Grid points double *grid = mk_1D_array(n+1, false); double **b = mk_2D_array(nrColon, m, false); double **bt = mk_2D_array(nrColon, m,false); int trad = omp_get_max_threads(); //ant tråder double **z = mk_2D_array(trad,nn, false); //z er 2D pga paralellisering med OpenMP, da FST ikke skal overskrive andre tråders z double *diag = mk_1D_array(m, false); double *sendbuf = mk_1D_array(nrColon*m, false); double *recbuf = mk_1D_array(nrColon*m, false); int *sendcnt = (int *) malloc((size+1) * sizeof(int)); //ant elementer jeg skal sende hver prosessor int *sdispls = (int *) malloc((size+1) * sizeof(int)); //index i sendbuf for hver prosessor sdispls[0]=0; //prosessor 0 skal alltid ha fra index 0 for(int i = 0;i<size;i++){ sendcnt[i] = cnt[i]*cnt[rank]; // antt elementer jeg eier * ant element den eier sdispls[i] = displs[i]*cnt[rank]; //displacement for hver prosessor } // GRID #pragma omp parallel for schedule(static) for (int i = 0; i < n+1; i++) { grid[i] = i * h; } #pragma omp parallel for schedule(static) for (size_t i = 0; i < m; i++) { diag[i] = 2.0 * (1.0 - cos((i+1) * PI / n)); //Eigenvalue } // Initialize the right hand side data #pragma omp parallel for schedule(static) for (size_t i = 0; i < nrColon; i++) { for (size_t j = 0; j < m; j++) { // b[i][j] = h * h; b[i][j] = h * h * func1(grid[i+displs[rank]], grid[j]); //evaluerer funksjoen * h*h } } // Calculate Btilde^T = S^-1 * (S * B)^T #pragma omp parallel for schedule(guided, 5) for (size_t i = 0; i < nrColon; i++) { fst_(b[i], &n, z[omp_get_thread_num()], &nn); } MPItranspose (b, bt,nrColon,m, sendbuf,recbuf,sendcnt,sdispls, size, rank, displs); #pragma omp parallel for schedule(static) for (size_t i = 0; i < nrColon; i++) { fstinv_(bt[i], &n, z[omp_get_thread_num()], &nn); } // Solve Lambda * Xtilde = Btilde #pragma omp parallel for schedule(static) for (int j=0; j < nrColon; j++) { for (int i=0; i < m; i++) { bt[j][i] /= (diag[j+displs[rank]]+diag[i]); } } // Calculate X = S^-1 * (S * Xtilde^T) #pragma omp parallel for schedule(static) for (size_t i = 0; i < nrColon; i++) { fst_(bt[i], &n, z[omp_get_thread_num()], &nn); } MPItranspose (bt, b, nrColon,m, sendbuf,recbuf,sendcnt,sdispls, size, rank, displs); #pragma omp parallel for schedule(static) for (size_t i = 0; i < nrColon; i++) { fstinv_(b[i], &n, z[omp_get_thread_num()], &nn); } // Calculate maximal value of solution double u_max = 0.0, temp; #pragma omp parallel for schedule(static) for (size_t i = 0; i < nrColon; i++) { for (size_t j = 0; j < m; j++) { temp = b[i][j] - func2(grid[displs[rank]+i], grid[j]); //tester resultat - kjent funksjon, skal bli = 0 if (temp > u_max){ u_max = temp; } } } MPI_Reduce (&u_max, &umaxglob, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); //Finner den største u_max fra de forskjellige prosessorene og setter den til umaxglob MPI_Finalize(); if (rank == 0) { printf("Nodes = %d \n", size); printf("Threads per node = %d \n", omp_get_max_threads()); printf("u_max = %e\n", umaxglob); //Printer max feil double times = MPI_Wtime()-start; //Stopper klokka printf("Time elapsed = %1.16f \n", times); //Pinter tid } return 0; }