예제 #1
0
파일: Comm.cpp 프로젝트: hwork/nao-man
void
Comm::add_to_module ()
{
  if (comm_module == NULL)
    initcomm();

  if (comm_module != NULL) {
    PyObject *comm = PyComm_new(this);
    PyModule_AddObject(comm_module, "inst", comm);
  }
}
예제 #2
0
int
main(int argc,char *argv[])
{
  int    i,j,k,nn;
  int    mx,my,mz,it;
  float  gosa;
  double cpu,cpu0,cpu1,flop,target;

  target= 60.0;
  omega= 0.8;
  mx= MX0-1;
  my= MY0-1;
  mz= MZ0-1;
  ndx= NDX0;
  ndy= NDY0;
  ndz= NDZ0;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &npe);
  MPI_Comm_rank(MPI_COMM_WORLD, &id);

  int    namelen;
  char   processor_name[MPI_MAX_PROCESSOR_NAME];
  MPI_Get_processor_name(processor_name,&namelen);
  fprintf(stderr, "[%d] %s\n", id, processor_name);

  initcomm(ndx,ndy,ndz);
  it= initmax(mx,my,mz);

  /*
   *    Initializing matrixes
   */
  initmt(mx,it);

  float *sendp2_buf = (float*)malloc(MIMAX*MKMAX*sizeof(float)*4);
  sendp2_lo_sendbuf = &sendp2_buf[MIMAX*MKMAX*0];
  sendp2_lo_recvbuf = &sendp2_buf[MIMAX*MKMAX*1];
  sendp2_hi_sendbuf = &sendp2_buf[MIMAX*MKMAX*2];
  sendp2_hi_recvbuf = &sendp2_buf[MIMAX*MKMAX*3];
#pragma acc enter data create(sendp2_buf[0:MIMAX*MKMAX*4])

  if(id==0){
    printf("Sequential version array size\n");
    printf(" mimax = %d mjmax = %d mkmax = %d\n",MX0,MY0,MZ0);
    printf("Parallel version array size\n");
    printf(" mimax = %d mjmax = %d mkmax = %d\n",MIMAX,MJMAX,MKMAX);
    printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax);
    printf("I-decomp = %d J-decomp = %d K-decomp =%d\n",ndx,ndy,ndz);
  }

  nn= 3;
  if(id==0){
    printf(" Start rehearsal measurement process.\n");
    printf(" Measure the performance in %d times.\n\n",nn);
  }

#pragma acc data copyin(p, bnd, wrk1, wrk2, a, b, c) present(sendp2_buf[0:MIMAX*MKMAX*4])
  {
  MPI_Barrier(MPI_COMM_WORLD);
  cpu0= gettime();
  gosa= jacobi(nn);
  cpu1= gettime();
  cpu = cpu1 - cpu0;

  MPI_Allreduce(MPI_IN_PLACE,
                &cpu,
                1,
                MPI_DOUBLE,
                MPI_MAX,
                MPI_COMM_WORLD);

  flop= fflop(mz,my,mx);

  if(id == 0){
    printf(" MFLOPS: %f time(s): %f %e\n\n",
           mflops(nn,cpu,flop),cpu,gosa);
  }

  nn= (int)(target/(cpu/3.0));
  nn= LOOP_TIMES;
  halo_time = 0.0;
  if(id == 0){
    printf(" Now, start the actual measurement process.\n");
    printf(" The loop will be excuted in %d times\n",nn);
    printf(" This will take about one minute.\n");
    printf(" Wait for a while\n\n");
  }

  /*
   *    Start measuring
   */
  MPI_Barrier(MPI_COMM_WORLD);
  cpu0= gettime();
  gosa= jacobi(nn);
  cpu1= gettime();
  cpu = cpu1 - cpu0;

  MPI_Allreduce(MPI_IN_PLACE,
                &cpu,
                1,
                MPI_DOUBLE,
                MPI_MAX,
                MPI_COMM_WORLD);

  MPI_Allreduce(&halo_time,
                &max_halo_time,
                1,
                MPI_DOUBLE,
                MPI_MAX,
                MPI_COMM_WORLD);

  MPI_Allreduce(&halo_time,
                &ave_halo_time,
                1,
                MPI_DOUBLE,
                MPI_SUM,
                MPI_COMM_WORLD);
  ave_halo_time /= npe;
  }//end of acc data

  if(id == 0){
    printf("cpu : %f sec. halo(AVE.) %f sec. halo(MAX) %f sec.\n", cpu, ave_halo_time, max_halo_time);
    printf("Loop executed for %d times\n",nn);
    printf("Gosa : %e \n",gosa);
    printf("MFLOPS measured : %f\n",mflops(nn,cpu,flop));
    printf("Score based on Pentium III 600MHz : %f\n",
           mflops(nn,cpu,flop)/82.84);
  }

  free(sendp2_buf);

  MPI_Finalize();

  return (0);
}
예제 #3
0
int
main(int argc,char *argv[])
{
  int    i,j,k,nn;
  int    mx,my,mz,it;
  float  gosa;
  double cpu,cpu0,cpu1,flop,target;

  target= 60.0;
  omega= 0.8;
  mx= MX0-1;
  my= MY0-1;
  mz= MZ0-1;
  ndx= NDX0;
  ndy= NDY0;
  ndz= NDZ0;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &npe);
  MPI_Comm_rank(MPI_COMM_WORLD, &id);

  hime_err_init(id);
  if (argc != 3) {
    if (id == 0) {
      printf("./bmt <Restart #> <Checkpoint interval (steps)>\n");
      printf("\n");
      printf("   Restart #:\n");
      printf("      Checkpiont id at which bmt starts\n");
      printf("   Checkpoint interval (steps):\n");
      printf("      # of Steps to skip checkpointing\n");
      printf("");
    }
    MPI_Finalize();
    exit(0);
  }
  
  restart_id = atoi(argv[1]);
  interval   = atoi(argv[2]);

  hime_dbgi(0, "Checkpoint directory: %s", CHECKPOINT_DIR);
  hime_dbgi(0, "Checkpoint interval:  %d", interval);

  if (restart_id > 0) {
    hime_dbgi(0, "Restart ID:  %d", restart_id);
    restart(restart_id);
  }  

  initcomm(ndx,ndy,ndz);
  it= initmax(mx,my,mz);

  /*
   *    Initializing matrixes
   */
  initmt(mx,it);

  if(id==0){
    printf("Sequential version array size\n");
    printf(" mimax = %d mjmax = %d mkmax = %d\n",MX0,MY0,MZ0);
    printf("Parallel version array size\n");
    printf(" mimax = %d mjmax = %d mkmax = %d\n",MIMAX,MJMAX,MKMAX);
    printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax);
    printf("I-decomp = %d J-decomp = %d K-decomp =%d\n",ndx,ndy,ndz);
  }

  nn= 3;
  if(id==0){
    printf(" Start rehearsal measurement process.\n");
    printf(" Measure the performance in %d times.\n\n",nn);
  }

  MPI_Barrier(MPI_COMM_WORLD);
  cpu0= MPI_Wtime();
  gosa= jacobi(nn);
  cpu1= MPI_Wtime() - cpu0;

  MPI_Allreduce(&cpu1,
                &cpu,
                1,
                MPI_DOUBLE,
                MPI_MAX,
                MPI_COMM_WORLD);

  flop= fflop(mz,my,mx);
  if(id == 0){
    printf(" MFLOPS: %f time(s): %f %e\n\n",
           mflops(nn,cpu,flop),cpu,gosa);
  }
  nn= (int)(target/(cpu/3.0));

  if(id == 0){
    printf(" Now, start the actual measurement process.\n");
    printf(" The loop will be excuted in %d times\n",nn);
    printf(" This will take about one minute.\n");
    printf(" Wait for a while\n\n");
  }

  /*
   *    Start measuring
   */
  MPI_Barrier(MPI_COMM_WORLD);
  cpu0 = MPI_Wtime();
  gosa = jacobi(nn);
  cpu1 = MPI_Wtime() - cpu0;

  MPI_Allreduce(&cpu1,
                &cpu,
                1,
                MPI_DOUBLE,
                MPI_MAX,
                MPI_COMM_WORLD);

  if(id == 0){
    printf("cpu : %f sec.\n", cpu);
    printf("Loop executed for %d times\n",nn);
    printf("Gosa : %e \n",gosa);
    printf("MFLOPS measured : %f\n",mflops(nn,cpu,flop));
    printf("Score based on Pentium III 600MHz : %f\n",
           mflops(nn,cpu,flop)/82.84);
  }


  MPI_Finalize();
  
  return (0);
}
예제 #4
0
파일: x_factor.c 프로젝트: 00liujj/trilinos
void X_FACTOR_ (DATA_TYPE *matrix,int *matrixsize,
   int *num_procsr, int *permute, double *secs)
{

  DATA_TYPE *mat;
  int *permutations;
  double run_secs;              /* time (in secs) during which the prog ran */
  double seconds();             /* function to generate timings */
  double tsecs;                 /* intermediate storage of timing info */

  int totmem1;
/*
   Determine who I am (me ) and the total number of nodes (nprocs_cube)
                                                                        */

  MPI_Comm_size(MPI_COMM_WORLD,&nprocs_cube);
  MPI_Comm_rank(MPI_COMM_WORLD, &me);

  permutations = permute;
  mat = matrix;

  matrix_size =  *matrixsize;
  nrows_matrix = *matrixsize;
  ncols_matrix = *matrixsize;
  nprocs_row = *num_procsr;

  totmem1=0;                      /* Initialize the total memory used */
  nprocs_col = nprocs_cube/nprocs_row;
  max_procs = (nprocs_row < nprocs_col) ? nprocs_col : nprocs_row;

    /* set up communicators for rows and columns */

    myrow = mesh_row(me);
    mycol = mesh_col(me);
    MPI_Comm_split(MPI_COMM_WORLD,myrow,mycol,&row_comm);
    MPI_Comm_split(MPI_COMM_WORLD,mycol,myrow,&col_comm);

    {int checkcol,checkrow;
     MPI_Comm_rank(col_comm, &checkrow) ;
     MPI_Comm_rank(row_comm, &checkcol) ;
     if (myrow != checkrow) {
       printf("Node %d: my row = %d but rank in col = %d\n",me,myrow,checkrow);     if (mycol != checkcol)
       printf("Node %d: my col = %d but rank in row = %d\n",me,mycol,checkcol);
     }
    }

  /* Distribution for the matrix on me */

  my_first_col = mesh_col(me);
  my_first_row = mesh_row(me);

  my_rows = nrows_matrix / nprocs_col;
  if (my_first_row < nrows_matrix % nprocs_col)
    ++my_rows;
  my_cols = ncols_matrix / nprocs_row;
  if (my_first_col < ncols_matrix % nprocs_row)
    ++my_cols;

  /* blksz paramter must be set */

  blksz = DEFBLKSZ;


  /* allocate arrays for factor/solve */


  pivot_vec = (int *) malloc(my_cols * sizeof(int));
  totmem1 += my_cols * sizeof(int);
  if (pivot_vec == NULL) {
    fprintf(stderr, "Node %d: Out of memory\n", me);
    exit(-1);
  }

  row3 = (DATA_TYPE *) malloc((my_cols +1+ blksz + nrhs) * sizeof(DATA_TYPE));
  totmem1 += (my_cols + blksz + 1) * sizeof(DATA_TYPE);
  if (row3 == NULL) {
    fprintf(stderr, "Node %d: Out of memory\n", me);
    exit(-1);
  }


  row2 = (DATA_TYPE *) malloc((my_cols + blksz+10 + nrhs) * sizeof(DATA_TYPE));
  totmem1 += (my_cols + blksz + 1) * sizeof(DATA_TYPE);
  if (row2 == NULL) {
    fprintf(stderr, "Node %d: Out of memory\n", me);
    exit(-1);
  }

  row1_stride = my_cols+blksz+1;
  row1 = (DATA_TYPE *) malloc(blksz*(my_cols+blksz+nrhs+3)*sizeof(DATA_TYPE));
  totmem1 += blksz * (my_cols + blksz + 1) * sizeof(DATA_TYPE);
  if (row1 == NULL) {
    fprintf(stderr, "Node %d: Out of memory\n", me);
    exit(-1);
  }

  col2 = (DATA_TYPE *) malloc((my_rows + 10) * sizeof(DATA_TYPE));
  totmem1 += (my_rows + 1) * sizeof(DATA_TYPE);
  if (col2 == NULL) {
    fprintf(stderr, "Node %d: Out of memory\n", me);
    exit(-1);
  }

  col1_stride = my_rows;
  col1 = (DATA_TYPE *) malloc(blksz * (my_rows + 10) * sizeof(DATA_TYPE));
  totmem1 += blksz * (my_rows + 1) * sizeof(DATA_TYPE);
  if (col1 == NULL) {
    fprintf(stderr, "Node %d: Out of memory\n", me);
    exit(-1);
  }

  mat_stride = my_rows;

  /* Factor and Solve the system */

  tsecs = seconds(0.0);
  /* Initialize Communication  */

  initcomm();
  factor(mat);

  tsecs = seconds(tsecs);

  run_secs = (double) tsecs;

  /* Solve time secs */

  *secs = run_secs;

  free(row2);

}
예제 #5
0
int
main(int argc,char *argv[])
{
  int    i,j,k,nn;
  int    mx,my,mz,it;
  float  gosa;
  double cpu,cpu0,cpu1,flop,target;

  target= 60.0;
  omega= 0.8;
  mx= MX0-1;
  my= MY0-1;
  mz= MZ0-1;
  ndx= NDX0;
  ndy= NDY0;
  ndz= NDZ0;

  MPI_Init(&argc, &argv);
#ifdef SCR_ENABLE
  SCR_Init();
#endif
  
  MPI_Comm_size(MPI_COMM_WORLD, &npe);
  MPI_Comm_rank(MPI_COMM_WORLD, &id);

  initcomm(ndx,ndy,ndz);
  it= initmax(mx,my,mz);

  /*
   *    Initializing matrixes
   */
  initmt(mx,it);

  if(id==0){
    printf("Sequential version array size\n");
    printf(" mimax = %d mjmax = %d mkmax = %d\n",MX0,MY0,MZ0);
    printf("Parallel version array size\n");
    printf(" mimax = %d mjmax = %d mkmax = %d\n",MIMAX,MJMAX,MKMAX);
    printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax);
    printf("I-decomp = %d J-decomp = %d K-decomp =%d\n",ndx,ndy,ndz);
  }

  nn= 3;
  if(id==0){
    printf(" Start rehearsal measurement process.\n");
    printf(" Measure the performance in %d times.\n\n",nn);
  }

  MPI_Barrier(MPI_COMM_WORLD);
  cpu0= MPI_Wtime();
  gosa= jacobi(nn);
  cpu1= MPI_Wtime() - cpu0;

  MPI_Allreduce(&cpu1,
                &cpu,
                1,
                MPI_DOUBLE,
                MPI_MAX,
                MPI_COMM_WORLD);

  flop= fflop(mz,my,mx);
  if(id == 0){
    printf(" MFLOPS: %f time(s): %f %e\n\n",
           mflops(nn,cpu,flop),cpu,gosa);
  }
 nn= (int)(target/(cpu/3.0));

  if(id == 0){
    printf(" Now, start the actual measurement process.\n");
    printf(" The loop will be excuted in %d times\n",nn);
    printf(" This will take about one minute.\n");
    printf(" Wait for a while\n\n");
  }

  /*
   *    Start measuring
   */
  MPI_Barrier(MPI_COMM_WORLD);
  cpu0 = MPI_Wtime();
  //  nn = 10000000;
  gosa = jacobi(nn);
  cpu1 = MPI_Wtime() - cpu0;

  MPI_Allreduce(&cpu1,
                &cpu,
                1,
                MPI_DOUBLE,
                MPI_MAX,
                MPI_COMM_WORLD);

  if(id == 0){
    fprintf(stderr, "cpu : %f sec.\n", cpu);
    fprintf(stderr, "Loop executed for %d times\n",nn);
    fprintf(stderr, "Gosa : %e \n",gosa);
    fprintf(stderr, "GFLOPS measured : %f\n",mflops(nn,cpu,flop)/1000.0);
    fprintf(stderr, "Score based on Pentium III 600MHz : %f\n",
           mflops(nn,cpu,flop)/82.84);
  }

#ifdef SCR_ENABLE  
  SCR_Finalize();
#endif
  MPI_Finalize();
  
  return (0);
}