示例#1
0
int main(int argc, char **argv)
{
    MPI_Init(&argc, &argv);
    int nprocs, rank;
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    int numthreads = omp_get_max_threads();

    if (argc < 2) {
        printf("Usage:\n");
        printf("  poisson n\n\n");
        printf("Arguments:\n");
        printf("  n: the problem size (must be a power of 2)\n");
    }

    double time_start;
    if (rank == 0) {
        time_start = MPI_Wtime();
    }

    // The number of grid points in each direction is n+1
    // The number of degrees of freedom in each direction is n-1 = m
    int n = atoi(argv[1]);
    int m = n - 1;
    int nn = 4 * n;
    real h = 1.0 / n;

    // Splitting the matrix into columns:
    int exact = n/nprocs;
    int rem = m - (nprocs - 1)*exact;
    // Size of each process owns a strip matrix which is m*exact or m*remain.
    // We consider that each such a matrix is made of 'nprocs' blocks vertically.
    int block_col = exact;
    int block_uk = exact*exact;
    int rem_uk = exact*rem;
    // For the last such strip, number of columns is rem. Consequently:
    if (rank == nprocs-1){
        block_col = rem;
        block_uk = rem*exact;
        rem_uk = rem*rem;
    }

    // Grid points
    real *grid = mk_1D_array(n+1, false);
    #pragma omp parallel for schedule(static)
    for (size_t i = 0; i < n+1; i++) {
        grid[i] = i * h;
    }

    // The diagonal of the eigenvalue matrix of T
    real *diag = mk_1D_array(m, false);
    #pragma omp parallel for schedule(static)
    for (size_t i = 0; i < m; i++) {
        diag[i] = 2.0 * (1.0 - cos((i+1) * PI / n));
    }

    // Initialize the right hand side data
    // B is the column strip that the process owns.*
    real **B = mk_2D_array(block_col, m, false);
    #pragma omp parallel for schedule(static)
    for (size_t i = 0; i < block_col; i++) {
        for (size_t j = 0; j < m; j++) {
            B[i][j] = h * h * rhs(grid[i+1+(rank*exact)], grid[j+1]);
        }
    }

    // For the Sine Transform:
    real **z = mk_2D_array(numthreads, nn, false);

    // Calculate Btilde^T = S^-1 * (S * B)^T
    #pragma omp parallel for schedule(static)
    for (size_t i = 0; i < block_col; i++) {
        fst_(B[i], &n, z[omp_get_thread_num()], &nn);
    }
    transpose(B, block_col, m, nprocs, block_uk, rem_uk, rank);
    #pragma omp parallel for schedule(static)
    for (size_t i = 0; i < block_col; i++) {
        fstinv_(B[i], &n, z[omp_get_thread_num()], &nn);
    }

    // Solve Lambda * Xtilde = Btilde
    #pragma omp parallel for schedule(static)
    for (size_t i = 0; i < block_col; i++) {
        for (size_t j = 0; j < m; j++) {
            B[i][j] = B[i][j] / (diag[i+(rank*exact)] + diag[j]);
        }
    }

    // Calculate X = S^-1 * (S * Xtilde^T) ^ T
    #pragma omp parallel for schedule(static)
    for (size_t i = 0; i < block_col; i++) {
        fst_(B[i], &n, z[omp_get_thread_num()], &nn);
    }
    transpose(B, block_col, m, nprocs, block_uk, rem_uk, rank);
    #pragma omp parallel for schedule(static)
    for (size_t i = 0; i < block_col; i++) {
        fstinv_(B[i], &n, z[omp_get_thread_num()], &nn);
    }

    // Calculate maximal value of solution
    double U_max = 0.0, e_max = 0.0, global_max, global_emax, error;
    for (size_t i = 0; i < block_col; i++){
        for (size_t j = 0; j < m; j++){
        	error = fabs(B[i][j] - sin(PI*(i+1+(rank*exact))*h)*sin(2*PI*(j+1)*h));
            U_max = U_max > B[i][j] ? U_max : B[i][j];
            e_max = e_max > error ? e_max : error;
        }
    }

    // MPI_Max to find the true maximum:
    MPI_Reduce(&U_max, &global_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    MPI_Reduce(&e_max, &global_emax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

    // Print the Global Maximum on process 0:
    if (rank == 0){
    	printf("Problem Size = %d\tNumprocs = %d\tNumthreads = %d\n", n, nprocs, numthreads);
        printf("U_max = %0.16f\n", global_max);
		printf("E_max = %0.16f\n", global_emax);
        double duration = MPI_Wtime() - time_start ;
        printf("Execution Time: %0.16f \n", duration);
    }

    MPI_Finalize();
    return 0;
}
示例#2
0
文件: poisson.c 项目: lapstue/tma4280
int main(int argc, char **argv)
{

    int size , rank, number; 

    MPI_Init(&argc, &argv); 
    MPI_Comm_size(MPI_COMM_WORLD , &size);
    MPI_Comm_rank(MPI_COMM_WORLD , &rank);

    double start =  MPI_Wtime(); //Starter klokka
    double umaxglob=0; //Max feil for alle tråder

    if (argc < 2) {
        printf("Usage:\n");
        printf("  poisson n\n\n");
        printf("Arguments:\n");
        printf("  n: the problem size (must be a power of 2)\n");
    }

    // The number of grid points in each direction is n+1
    // The number of degrees of freedom in each direction is n-1
    int n = atoi(argv[1]);
    
    int m = n - 1;  // ant punk hver vei i B

    int *cnt = (int *) malloc(size * sizeof(int)); //loakal ant kolonner i matrix
    int *displs = (int *) malloc((size+1) * sizeof(int)); //lokal displacement for de andre prosessorene sine punkter i sendbuf
    displs[size] = m;
    displs[0]=0; //Displacement til første prosessor er alltid 0


   int  overflow = m % size; //ant kolonner som blir til overs 

    for(int i = 0;i<size;i++){
        cnt[i] = m / size; // nrColon for hver prosessor  
        if (overflow != 0){ 
            cnt[i]++; //fordeler de ekstra kolonnene
            overflow--;
        }
        if (i < size-1){
            displs[i+1] = displs[i]+cnt[i];
        }

    }
 
    int nrColon = cnt[rank]; //ant kolonner "jeg" har
    int pros_dof = nrColon*m;  //ant lementer jeg har


    int nn = 4 * n;
    double h = 1.0 / n;


    // Grid points
    double *grid = mk_1D_array(n+1, false);
    double **b = mk_2D_array(nrColon, m, false);
    double **bt = mk_2D_array(nrColon, m,false);

    int trad = omp_get_max_threads(); //ant tråder 
    double **z = mk_2D_array(trad,nn, false); //z er 2D pga paralellisering med OpenMP, da FST ikke skal overskrive andre tråders z

    double *diag = mk_1D_array(m, false);     
    double *sendbuf = mk_1D_array(nrColon*m, false);
    double *recbuf = mk_1D_array(nrColon*m, false); 


    int *sendcnt = (int *) malloc((size+1) * sizeof(int)); //ant elementer jeg skal sende hver prosessor 
    int *sdispls = (int *) malloc((size+1) * sizeof(int)); //index i sendbuf for hver prosessor

 

    sdispls[0]=0; //prosessor 0 skal alltid ha fra index 0
    for(int i = 0;i<size;i++){
        sendcnt[i] = cnt[i]*cnt[rank]; //  antt elementer jeg eier * ant element den eier
        sdispls[i] = displs[i]*cnt[rank]; //displacement for hver prosessor
    }

    // GRID
    #pragma omp parallel for schedule(static)
    for (int i = 0; i < n+1; i++) {
        grid[i] = i * h;
    }




    #pragma omp parallel for schedule(static)
    for (size_t i = 0; i < m; i++) {
        diag[i] = 2.0 * (1.0 - cos((i+1) * PI / n)); //Eigenvalue
      }

  // Initialize the right hand side data 
    
    #pragma omp parallel for schedule(static)
    for (size_t i = 0; i < nrColon; i++) {
        for (size_t j = 0; j < m; j++) {
        //  b[i][j] = h * h;
            b[i][j] = h * h * func1(grid[i+displs[rank]], grid[j]); //evaluerer funksjoen * h*h
        }
    }

    // Calculate Btilde^T = S^-1 * (S * B)^T 
 
    #pragma omp parallel for schedule(guided, 5)
    for (size_t i = 0; i < nrColon; i++) {
        fst_(b[i], &n, z[omp_get_thread_num()], &nn);
    }
    MPItranspose (b, bt,nrColon,m, sendbuf,recbuf,sendcnt,sdispls, size, rank, displs);

    #pragma omp parallel for schedule(static)
    for (size_t i = 0; i < nrColon; i++) {
        fstinv_(bt[i], &n, z[omp_get_thread_num()], &nn);
    }

    // Solve Lambda * Xtilde = Btilde

    #pragma omp parallel for schedule(static)

    for (int j=0; j < nrColon; j++) {
       for (int i=0; i < m; i++) {
            bt[j][i] /= (diag[j+displs[rank]]+diag[i]);
        }
    }

    // Calculate X = S^-1 * (S * Xtilde^T)
    #pragma omp parallel for schedule(static)
    for (size_t i = 0; i < nrColon; i++) {
        fst_(bt[i], &n, z[omp_get_thread_num()], &nn);

    }
    MPItranspose (bt, b, nrColon,m, sendbuf,recbuf,sendcnt,sdispls, size, rank, displs);

    #pragma omp parallel for schedule(static)
    for (size_t i = 0; i < nrColon; i++) {
        fstinv_(b[i], &n, z[omp_get_thread_num()], &nn);
    }

    // Calculate maximal value of solution
    double u_max = 0.0, temp;

    #pragma omp parallel for schedule(static)
    for (size_t i = 0; i < nrColon; i++) {
        for (size_t j = 0; j < m; j++) {
            temp = b[i][j] - func2(grid[displs[rank]+i], grid[j]);  //tester resultat - kjent funksjon, skal bli = 0
            if (temp > u_max){
                u_max = temp;
            }
        }
    }
    MPI_Reduce (&u_max, &umaxglob, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); //Finner den største u_max fra de forskjellige prosessorene og setter den til umaxglob 

    MPI_Finalize();

    if (rank == 0) {
        printf("Nodes = %d \n", size);
        printf("Threads per node = %d \n", omp_get_max_threads());
        printf("u_max = %e\n", umaxglob);  //Printer max feil
        double times = MPI_Wtime()-start; //Stopper klokka
        printf("Time elapsed = %1.16f \n", times); //Pinter tid
    }
    return 0;
}