void Comm::add_to_module () { if (comm_module == NULL) initcomm(); if (comm_module != NULL) { PyObject *comm = PyComm_new(this); PyModule_AddObject(comm_module, "inst", comm); } }
int main(int argc,char *argv[]) { int i,j,k,nn; int mx,my,mz,it; float gosa; double cpu,cpu0,cpu1,flop,target; target= 60.0; omega= 0.8; mx= MX0-1; my= MY0-1; mz= MZ0-1; ndx= NDX0; ndy= NDY0; ndz= NDZ0; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &npe); MPI_Comm_rank(MPI_COMM_WORLD, &id); int namelen; char processor_name[MPI_MAX_PROCESSOR_NAME]; MPI_Get_processor_name(processor_name,&namelen); fprintf(stderr, "[%d] %s\n", id, processor_name); initcomm(ndx,ndy,ndz); it= initmax(mx,my,mz); /* * Initializing matrixes */ initmt(mx,it); float *sendp2_buf = (float*)malloc(MIMAX*MKMAX*sizeof(float)*4); sendp2_lo_sendbuf = &sendp2_buf[MIMAX*MKMAX*0]; sendp2_lo_recvbuf = &sendp2_buf[MIMAX*MKMAX*1]; sendp2_hi_sendbuf = &sendp2_buf[MIMAX*MKMAX*2]; sendp2_hi_recvbuf = &sendp2_buf[MIMAX*MKMAX*3]; #pragma acc enter data create(sendp2_buf[0:MIMAX*MKMAX*4]) if(id==0){ printf("Sequential version array size\n"); printf(" mimax = %d mjmax = %d mkmax = %d\n",MX0,MY0,MZ0); printf("Parallel version array size\n"); printf(" mimax = %d mjmax = %d mkmax = %d\n",MIMAX,MJMAX,MKMAX); printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax); printf("I-decomp = %d J-decomp = %d K-decomp =%d\n",ndx,ndy,ndz); } nn= 3; if(id==0){ printf(" Start rehearsal measurement process.\n"); printf(" Measure the performance in %d times.\n\n",nn); } #pragma acc data copyin(p, bnd, wrk1, wrk2, a, b, c) present(sendp2_buf[0:MIMAX*MKMAX*4]) { MPI_Barrier(MPI_COMM_WORLD); cpu0= gettime(); gosa= jacobi(nn); cpu1= gettime(); cpu = cpu1 - cpu0; MPI_Allreduce(MPI_IN_PLACE, &cpu, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); flop= fflop(mz,my,mx); if(id == 0){ printf(" MFLOPS: %f time(s): %f %e\n\n", mflops(nn,cpu,flop),cpu,gosa); } nn= (int)(target/(cpu/3.0)); nn= LOOP_TIMES; halo_time = 0.0; if(id == 0){ printf(" Now, start the actual measurement process.\n"); printf(" The loop will be excuted in %d times\n",nn); printf(" This will take about one minute.\n"); printf(" Wait for a while\n\n"); } /* * Start measuring */ MPI_Barrier(MPI_COMM_WORLD); cpu0= gettime(); gosa= jacobi(nn); cpu1= gettime(); cpu = cpu1 - cpu0; MPI_Allreduce(MPI_IN_PLACE, &cpu, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(&halo_time, &max_halo_time, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(&halo_time, &ave_halo_time, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); ave_halo_time /= npe; }//end of acc data if(id == 0){ printf("cpu : %f sec. halo(AVE.) %f sec. halo(MAX) %f sec.\n", cpu, ave_halo_time, max_halo_time); printf("Loop executed for %d times\n",nn); printf("Gosa : %e \n",gosa); printf("MFLOPS measured : %f\n",mflops(nn,cpu,flop)); printf("Score based on Pentium III 600MHz : %f\n", mflops(nn,cpu,flop)/82.84); } free(sendp2_buf); MPI_Finalize(); return (0); }
int main(int argc,char *argv[]) { int i,j,k,nn; int mx,my,mz,it; float gosa; double cpu,cpu0,cpu1,flop,target; target= 60.0; omega= 0.8; mx= MX0-1; my= MY0-1; mz= MZ0-1; ndx= NDX0; ndy= NDY0; ndz= NDZ0; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &npe); MPI_Comm_rank(MPI_COMM_WORLD, &id); hime_err_init(id); if (argc != 3) { if (id == 0) { printf("./bmt <Restart #> <Checkpoint interval (steps)>\n"); printf("\n"); printf(" Restart #:\n"); printf(" Checkpiont id at which bmt starts\n"); printf(" Checkpoint interval (steps):\n"); printf(" # of Steps to skip checkpointing\n"); printf(""); } MPI_Finalize(); exit(0); } restart_id = atoi(argv[1]); interval = atoi(argv[2]); hime_dbgi(0, "Checkpoint directory: %s", CHECKPOINT_DIR); hime_dbgi(0, "Checkpoint interval: %d", interval); if (restart_id > 0) { hime_dbgi(0, "Restart ID: %d", restart_id); restart(restart_id); } initcomm(ndx,ndy,ndz); it= initmax(mx,my,mz); /* * Initializing matrixes */ initmt(mx,it); if(id==0){ printf("Sequential version array size\n"); printf(" mimax = %d mjmax = %d mkmax = %d\n",MX0,MY0,MZ0); printf("Parallel version array size\n"); printf(" mimax = %d mjmax = %d mkmax = %d\n",MIMAX,MJMAX,MKMAX); printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax); printf("I-decomp = %d J-decomp = %d K-decomp =%d\n",ndx,ndy,ndz); } nn= 3; if(id==0){ printf(" Start rehearsal measurement process.\n"); printf(" Measure the performance in %d times.\n\n",nn); } MPI_Barrier(MPI_COMM_WORLD); cpu0= MPI_Wtime(); gosa= jacobi(nn); cpu1= MPI_Wtime() - cpu0; MPI_Allreduce(&cpu1, &cpu, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); flop= fflop(mz,my,mx); if(id == 0){ printf(" MFLOPS: %f time(s): %f %e\n\n", mflops(nn,cpu,flop),cpu,gosa); } nn= (int)(target/(cpu/3.0)); if(id == 0){ printf(" Now, start the actual measurement process.\n"); printf(" The loop will be excuted in %d times\n",nn); printf(" This will take about one minute.\n"); printf(" Wait for a while\n\n"); } /* * Start measuring */ MPI_Barrier(MPI_COMM_WORLD); cpu0 = MPI_Wtime(); gosa = jacobi(nn); cpu1 = MPI_Wtime() - cpu0; MPI_Allreduce(&cpu1, &cpu, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); if(id == 0){ printf("cpu : %f sec.\n", cpu); printf("Loop executed for %d times\n",nn); printf("Gosa : %e \n",gosa); printf("MFLOPS measured : %f\n",mflops(nn,cpu,flop)); printf("Score based on Pentium III 600MHz : %f\n", mflops(nn,cpu,flop)/82.84); } MPI_Finalize(); return (0); }
void X_FACTOR_ (DATA_TYPE *matrix,int *matrixsize, int *num_procsr, int *permute, double *secs) { DATA_TYPE *mat; int *permutations; double run_secs; /* time (in secs) during which the prog ran */ double seconds(); /* function to generate timings */ double tsecs; /* intermediate storage of timing info */ int totmem1; /* Determine who I am (me ) and the total number of nodes (nprocs_cube) */ MPI_Comm_size(MPI_COMM_WORLD,&nprocs_cube); MPI_Comm_rank(MPI_COMM_WORLD, &me); permutations = permute; mat = matrix; matrix_size = *matrixsize; nrows_matrix = *matrixsize; ncols_matrix = *matrixsize; nprocs_row = *num_procsr; totmem1=0; /* Initialize the total memory used */ nprocs_col = nprocs_cube/nprocs_row; max_procs = (nprocs_row < nprocs_col) ? nprocs_col : nprocs_row; /* set up communicators for rows and columns */ myrow = mesh_row(me); mycol = mesh_col(me); MPI_Comm_split(MPI_COMM_WORLD,myrow,mycol,&row_comm); MPI_Comm_split(MPI_COMM_WORLD,mycol,myrow,&col_comm); {int checkcol,checkrow; MPI_Comm_rank(col_comm, &checkrow) ; MPI_Comm_rank(row_comm, &checkcol) ; if (myrow != checkrow) { printf("Node %d: my row = %d but rank in col = %d\n",me,myrow,checkrow); if (mycol != checkcol) printf("Node %d: my col = %d but rank in row = %d\n",me,mycol,checkcol); } } /* Distribution for the matrix on me */ my_first_col = mesh_col(me); my_first_row = mesh_row(me); my_rows = nrows_matrix / nprocs_col; if (my_first_row < nrows_matrix % nprocs_col) ++my_rows; my_cols = ncols_matrix / nprocs_row; if (my_first_col < ncols_matrix % nprocs_row) ++my_cols; /* blksz paramter must be set */ blksz = DEFBLKSZ; /* allocate arrays for factor/solve */ pivot_vec = (int *) malloc(my_cols * sizeof(int)); totmem1 += my_cols * sizeof(int); if (pivot_vec == NULL) { fprintf(stderr, "Node %d: Out of memory\n", me); exit(-1); } row3 = (DATA_TYPE *) malloc((my_cols +1+ blksz + nrhs) * sizeof(DATA_TYPE)); totmem1 += (my_cols + blksz + 1) * sizeof(DATA_TYPE); if (row3 == NULL) { fprintf(stderr, "Node %d: Out of memory\n", me); exit(-1); } row2 = (DATA_TYPE *) malloc((my_cols + blksz+10 + nrhs) * sizeof(DATA_TYPE)); totmem1 += (my_cols + blksz + 1) * sizeof(DATA_TYPE); if (row2 == NULL) { fprintf(stderr, "Node %d: Out of memory\n", me); exit(-1); } row1_stride = my_cols+blksz+1; row1 = (DATA_TYPE *) malloc(blksz*(my_cols+blksz+nrhs+3)*sizeof(DATA_TYPE)); totmem1 += blksz * (my_cols + blksz + 1) * sizeof(DATA_TYPE); if (row1 == NULL) { fprintf(stderr, "Node %d: Out of memory\n", me); exit(-1); } col2 = (DATA_TYPE *) malloc((my_rows + 10) * sizeof(DATA_TYPE)); totmem1 += (my_rows + 1) * sizeof(DATA_TYPE); if (col2 == NULL) { fprintf(stderr, "Node %d: Out of memory\n", me); exit(-1); } col1_stride = my_rows; col1 = (DATA_TYPE *) malloc(blksz * (my_rows + 10) * sizeof(DATA_TYPE)); totmem1 += blksz * (my_rows + 1) * sizeof(DATA_TYPE); if (col1 == NULL) { fprintf(stderr, "Node %d: Out of memory\n", me); exit(-1); } mat_stride = my_rows; /* Factor and Solve the system */ tsecs = seconds(0.0); /* Initialize Communication */ initcomm(); factor(mat); tsecs = seconds(tsecs); run_secs = (double) tsecs; /* Solve time secs */ *secs = run_secs; free(row2); }
int main(int argc,char *argv[]) { int i,j,k,nn; int mx,my,mz,it; float gosa; double cpu,cpu0,cpu1,flop,target; target= 60.0; omega= 0.8; mx= MX0-1; my= MY0-1; mz= MZ0-1; ndx= NDX0; ndy= NDY0; ndz= NDZ0; MPI_Init(&argc, &argv); #ifdef SCR_ENABLE SCR_Init(); #endif MPI_Comm_size(MPI_COMM_WORLD, &npe); MPI_Comm_rank(MPI_COMM_WORLD, &id); initcomm(ndx,ndy,ndz); it= initmax(mx,my,mz); /* * Initializing matrixes */ initmt(mx,it); if(id==0){ printf("Sequential version array size\n"); printf(" mimax = %d mjmax = %d mkmax = %d\n",MX0,MY0,MZ0); printf("Parallel version array size\n"); printf(" mimax = %d mjmax = %d mkmax = %d\n",MIMAX,MJMAX,MKMAX); printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax); printf("I-decomp = %d J-decomp = %d K-decomp =%d\n",ndx,ndy,ndz); } nn= 3; if(id==0){ printf(" Start rehearsal measurement process.\n"); printf(" Measure the performance in %d times.\n\n",nn); } MPI_Barrier(MPI_COMM_WORLD); cpu0= MPI_Wtime(); gosa= jacobi(nn); cpu1= MPI_Wtime() - cpu0; MPI_Allreduce(&cpu1, &cpu, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); flop= fflop(mz,my,mx); if(id == 0){ printf(" MFLOPS: %f time(s): %f %e\n\n", mflops(nn,cpu,flop),cpu,gosa); } nn= (int)(target/(cpu/3.0)); if(id == 0){ printf(" Now, start the actual measurement process.\n"); printf(" The loop will be excuted in %d times\n",nn); printf(" This will take about one minute.\n"); printf(" Wait for a while\n\n"); } /* * Start measuring */ MPI_Barrier(MPI_COMM_WORLD); cpu0 = MPI_Wtime(); // nn = 10000000; gosa = jacobi(nn); cpu1 = MPI_Wtime() - cpu0; MPI_Allreduce(&cpu1, &cpu, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); if(id == 0){ fprintf(stderr, "cpu : %f sec.\n", cpu); fprintf(stderr, "Loop executed for %d times\n",nn); fprintf(stderr, "Gosa : %e \n",gosa); fprintf(stderr, "GFLOPS measured : %f\n",mflops(nn,cpu,flop)/1000.0); fprintf(stderr, "Score based on Pentium III 600MHz : %f\n", mflops(nn,cpu,flop)/82.84); } #ifdef SCR_ENABLE SCR_Finalize(); #endif MPI_Finalize(); return (0); }