Example #1
0
void IMB_ibarrier_pure(struct comm_info* c_info,
                       int size,
                       struct iter_schedule* ITERATIONS,
                       MODES RUN_MODE,
                       double* time)
{
    int         i = 0;
    MPI_Request request;
    MPI_Status  status;
    double      t_pure = 0.;

#ifdef CHECK
    defect=0.;
#endif
    ierr = 0;

    if(c_info->rank != -1) {
        IMB_do_n_barriers (c_info->communicator, N_BARR);

        t_pure = MPI_Wtime();
        for(i = 0; i < ITERATIONS->n_sample; i++) {
            ierr = MPI_Ibarrier(c_info->communicator, &request);
            MPI_ERRHAND(ierr);
            MPI_Wait(&request, &status);
        }
        t_pure = (MPI_Wtime() - t_pure) / ITERATIONS->n_sample;
    }

    time[0] = t_pure;
}
Example #2
0
void IMB_barrier(struct comm_info* c_info, int size,  struct iter_schedule* ITERATIONS,
                 MODES RUN_MODE, double* time)
/*

                      
                      MPI-1 benchmark kernel
                      Benchmarks MPI_Barrier
                      


Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-size                 (type int)                      
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling

-RUN_MODE             (type MODES)                      
                      (only MPI-2 case: see [1])


Output variables: 

-time                 (type double*)                      
                      Timing result per sample


*/
{
  double t1, t2;
  int    i;

  ierr = 0;

  if(c_info->rank!=-1)
  {
      IMB_do_n_barriers (c_info->communicator, N_BARR);

      t1 = MPI_Wtime();
      for(i=0;i< ITERATIONS->n_sample;i++)
      {
          ierr= MPI_Barrier(c_info->communicator);
          MPI_ERRHAND(ierr);
      }
      t2 = MPI_Wtime();
      *time=(t2 - t1)/(ITERATIONS->n_sample);
  }
  else
  { 
      *time = 0.; 
  }
}
void IMB_iallreduce_pure(struct comm_info* c_info,
                         int size,
                         struct iter_schedule* ITERATIONS,
                         MODES RUN_MODE,
                         double* time)
{
    int         i = 0;
    Type_Size   s_size;
    int         s_num = 0;
    MPI_Request request;
    MPI_Status  status;
    double      t_pure = 0.;

#ifdef CHECK
    defect=0.;
#endif
    ierr = 0;

    /* GET SIZE OF DATA TYPE */
    MPI_Type_size(c_info->red_data_type, &s_size);
    if (s_size != 0) 
    {
        s_num = size / s_size;
    }

    if(c_info->rank != -1) 
    {
        IMB_do_n_barriers (c_info->communicator, N_BARR);

        for(i = 0; i < ITERATIONS->n_sample; i++)
        {
            t_pure -= MPI_Wtime();
            ierr = MPI_Iallreduce((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                  (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                                  s_num,
                                  c_info->red_data_type,
                                  c_info->op_type,
                                  c_info->communicator,
                                  &request);
            MPI_ERRHAND(ierr);
            MPI_Wait(&request, &status);
            t_pure += MPI_Wtime();

            CHK_DIFF("Iallreduce_pure", c_info,
                     (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                     0, size, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect);
            IMB_do_n_barriers (c_info->communicator, c_info->sync);
        }
        t_pure /= ITERATIONS->n_sample;
    }

    time[0] = t_pure;
}
Example #4
0
void IMB_ibarrier(struct comm_info* c_info,
                  int size,
                  struct iter_schedule* ITERATIONS,
                  MODES RUN_MODE,
                  double* time)
{
    int         i = 0;
    MPI_Request request;
    MPI_Status  status;
    double      t_pure = 0.,
                t_comp = 0.,
                t_ovrlp = 0.;

#ifdef CHECK
    defect=0.;
#endif
    ierr = 0;

    if(c_info->rank != -1) {
        IMB_ibarrier_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure);

        /* INITIALIZATION CALL */
        IMB_cpu_exploit(t_pure, 1);

        IMB_do_n_barriers (c_info->communicator, N_BARR);

        t_ovrlp = MPI_Wtime();
        for(i=0; i < ITERATIONS->n_sample; i++) {
            ierr = MPI_Ibarrier(c_info->communicator, &request);
            MPI_ERRHAND(ierr);

            t_comp -= MPI_Wtime();
            IMB_cpu_exploit(t_pure, 0);
            t_comp += MPI_Wtime();

            MPI_Wait(&request, &status);
        }
        t_ovrlp = (MPI_Wtime() - t_ovrlp) / ITERATIONS->n_sample;
        t_comp /= ITERATIONS->n_sample;
    }

    time[0] = t_pure;
    time[1] = t_ovrlp;
    time[2] = t_comp;
}
Example #5
0
void IMB_exchange(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                  MODES RUN_MODE, double* time)
/*

                      
                      MPI-1 benchmark kernel
                      Chainwise exchange; MPI_Isend (left+right) + MPI_Recv (right+left)
                      


Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-size                 (type int)                      
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling

-RUN_MODE             (type MODES)                      
                      (only MPI-2 case: see [1])


Output variables: 

-time                 (type double*)                      
                      Timing result per sample


*/
{
  double t1, t2;
  int  i;
  
  Type_Size s_size, r_size;
  int s_num, r_num;
  int s_tag, r_tag;
  int left, right;
  MPI_Status  stat[2];
  MPI_Request request[2];
  
#ifdef CHECK 
  defect=0;
#endif
  ierr = 0;

  /*GET SIZE OF DATA TYPE's in s_size and r_size*/
  MPI_Type_size(c_info->s_data_type,&s_size);
  MPI_Type_size(c_info->r_data_type,&r_size);
  if ((s_size!=0) && (r_size!=0))
    {
      s_num=size/s_size;
      r_num=size/r_size;
    } 
  s_tag = 1;
  r_tag = c_info->select_tag ? s_tag : MPI_ANY_TAG;

  
  if(c_info->rank != -1)
    {
      if(c_info->rank < c_info->num_procs-1)   right  = c_info->rank+1;
      if(c_info->rank > 0)                     left   = c_info->rank-1;
      
      if(c_info->rank == c_info->num_procs-1)  right  = 0;
      if(c_info->rank == 0)                    left   = c_info->num_procs-1 ;
      
      if((c_info->rank >= 0) && (c_info->rank <= c_info->num_procs-1))
	{
          for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);

	  t1 = MPI_Wtime();
	  for(i=0; i< ITERATIONS->n_sample; i++)
	    { 
	      ierr= MPI_Isend((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
                              s_num,c_info->s_data_type,
			      right,s_tag,c_info->communicator,&request[0]);
	      MPI_ERRHAND(ierr);
	      ierr= MPI_Isend((char*)c_info->s_buffer+size+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
                              s_num,c_info->s_data_type,
			      left ,s_tag,c_info->communicator,&request[1]);
	      MPI_ERRHAND(ierr);

	      ierr= MPI_Recv( (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
                              r_num,c_info->r_data_type,
			      left ,r_tag,c_info->communicator,stat);
	      MPI_ERRHAND(ierr);

              CHK_DIFF("Exchange",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
                       0, size, size, 1, 
                       put, 0, ITERATIONS->n_sample, i,
                       left, &defect);

	      ierr= MPI_Recv( (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
                              r_num,c_info->r_data_type,
			      right,r_tag,c_info->communicator,stat);
	      MPI_ERRHAND(ierr);
	      
              CHK_DIFF("Exchange",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
                       s_num, size, size, 1, 
                       put, 0, ITERATIONS->n_sample, i,
                       right, &defect);

	      ierr= MPI_Waitall(2,request,stat);
	      MPI_ERRHAND(ierr);   
	    }
	  t2 = MPI_Wtime();
	  *time=(t2 - t1)/ITERATIONS->n_sample;
	}
    }
  else
    { 
      *time = 0.; 
    }
}
Example #6
0
void IMB_allgather(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                   MODES RUN_MODE, double* time)
/*

                      
                      MPI-1 benchmark kernel
                      Benchmarks MPI_Allgather
                      


Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-size                 (type int)                      
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling

-RUN_MODE             (type MODES)                      
                      (only MPI-2 case: see [1])


Output variables: 

-time                 (type double*)                      
                      Timing result per sample


*/
{
  double t1, t2;
  int    i;

  Type_Size s_size,r_size;
  int s_num, r_num;
  
#ifdef CHECK
defect=0.;
#endif
  ierr = 0;
  /*  GET SIZE OF DATA TYPE */  
  MPI_Type_size(c_info->s_data_type,&s_size);
  MPI_Type_size(c_info->r_data_type,&r_size);
  if ((s_size!=0) && (r_size!=0))
    {
      s_num=size/s_size;
      r_num=size/r_size;
    } 
  
  if(c_info->rank!=-1)
    {
      for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);

      t1 = MPI_Wtime();
      for(i=0;i< ITERATIONS->n_sample;i++)
        {
          ierr = MPI_Allgather((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
                               s_num,c_info->s_data_type,
			       (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
                               r_num,c_info->r_data_type,
			       c_info->communicator);
          MPI_ERRHAND(ierr);

          CHK_DIFF("Allgather",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0,
                   0, (size_t) c_info->num_procs* (size_t) size, 1, 
                   put, 0, ITERATIONS->n_sample, i,
                   -2, &defect);
        }
      t2 = MPI_Wtime();
      *time=(t2 - t1)/ITERATIONS->n_sample;
    }
  else
    { 
      *time = 0.; 
    }
}
Example #7
0
void IMB_window(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                MODES RUN_MODE, double* time)
/*

                      
                      MPI-2 benchmark kernel
                      MPI_Win_create + MPI_Win_fence + MPI_Win_free
                      


Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-size                 (type int)                      
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule)                      
                      Repetition scheduling

-RUN_MODE             (type MODES)                      
                      Mode (aggregate/non aggregate; blocking/nonblocking);
                      see "IMB_benchmark.h" for definition


Output variables: 

-time                 (type double*)                      
                      Timing result per sample


*/
{
  double t1, t2;
  int    i, dum;

  ierr = 0;

  if(c_info->rank!=-1)
    {
      for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);

      t1 = MPI_Wtime();
      for(i=0;i< ITERATIONS->n_sample;i++)
	{
          ierr = MPI_Win_create(c_info->r_buffer,size,1,MPI_INFO_NULL,
                                c_info->communicator, &c_info->WIN);
          MPI_ERRHAND(ierr);
          ierr = MPI_Win_fence(0, c_info->WIN);
          MPI_ERRHAND(ierr);
/* July 2002 fix V2.2.1, empty window case */
          if(size>0)
          {
          ierr = MPI_Put(c_info->s_buffer, 1, c_info->s_data_type,
                         c_info->rank, 0, 1, c_info->r_data_type, c_info->WIN);
          MPI_ERRHAND(ierr);
          }

          ierr = MPI_Win_fence(0, c_info->WIN);
          MPI_ERRHAND(ierr);
	  
          ierr = MPI_Win_free(&c_info->WIN);
          MPI_ERRHAND(ierr);
	}
      t2 = MPI_Wtime();
      *time=(t2 - t1)/(ITERATIONS->n_sample);
    }
  else
    { 
      *time = 0.; 
    }
}
Example #8
0
void Accumulate (struct comm_info* c_info,
                 int size,int n_sample,MODES RUN_MODE,double* time)
/*************************************************************************/

/*------------------------------------------------------------
             VARIABLE |       TYPE        |   MEANING
------------------------------------------------------------
Input      : c_info   | struct comm_info* | see comm_info.h 
             size     | int               | message length in byte
	     n_sample | int               | repetition count
             RUN_MODE | MODES (typedef,   | Distinction aggregate/
                      | see Benchmark.h)  | non aggr., see docu.
                      |                   |
Output     : time     | double*           | *time: time/sample in usec
                      |                   |
In/Out     :  -       | -                 | -
                      |                   |  
------------------------------------------------------------
------------------------------------------------------------
Description: see the accompanying document
-------------------------------------------------------------*/
{
  double t1, t2;
  
  Type_Size s_size,r_size;
  int s_num, r_num;
  int s_tag, r_tag;
  int dest, source, root;
  int i;
  MPI_Status stat;


#ifdef CHECK 
  defect=0;
#endif
  ierr = 0;

  /*  GET SIZE OF DATA TYPE */  
MPI_Type_size(c_info->red_data_type,&s_size);
if (s_size!=0) s_num=size/s_size;

root = (c_info-> rank == 0);


if( c_info-> rank < 0 )
*time = 0.;
else
{

if( !RUN_MODE->AGGREGATE )
{

*time = MPI_Wtime();

for(i=0;i< n_sample;i++)
	{

       ierr = MPI_Accumulate
                     (c_info->s_buffer, s_num, c_info->red_data_type,
                      0, i*s_num, s_num, c_info->red_data_type, c_info->op_type,
                      c_info->WIN );
       MPI_ERRHAND(ierr);

       ierr = MPI_Win_fence(0, c_info->WIN);
       MPI_ERRHAND(ierr);
#ifdef CHECK
if( root ) 
{
       CHK_DIFF("Accumulate",c_info, (void*)(c_info->r_data+i*s_num), 0,
                size, size, asize, 
                put, 0, n_sample, i,
                -1, &defect);
       ass_buf(c_info->r_buffer, 0, 0, size-1, 0);
}
MPI_Barrier(c_info->communicator);
#endif

	}
*time=(MPI_Wtime()-*time)/n_sample;
}

if( RUN_MODE->AGGREGATE )
{

for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);

*time = MPI_Wtime();

for(i=0;i< n_sample;i++)
	{


       ierr = MPI_Accumulate
                ((void*)(c_info->s_data+i*s_num), s_num, c_info->red_data_type,
                 0, i*s_num, s_num, c_info->red_data_type, c_info->op_type,
                 c_info->WIN );
       MPI_ERRHAND(ierr);

	}

       ierr = MPI_Win_fence(0, c_info->WIN);
       MPI_ERRHAND(ierr);

*time=(MPI_Wtime()-*time)/n_sample;

#ifdef CHECK
if( root ) 
{
    CHK_DIFF("Accumulate",c_info, c_info->r_buffer, 0,
             n_sample*size, n_sample*size, asize, 
             put, 0, n_sample, -1,
             -1, &defect);
}
#endif



}

}
}
Example #9
0
void IMB_igatherv_pure(struct comm_info* c_info,
                       int size,
                       struct iter_schedule* ITERATIONS,
                       MODES RUN_MODE,
                       double* time)
/*


                      MPI-NBC benchmark kernel
                      Benchmarks IMB_Igatherv_pure



Input variables:

-c_info               (type struct comm_info*)
                      Collection of all base data for MPI;
                      see [1] for more information


-size                 (type int)
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling

-RUN_MODE             (type MODES)
                      (only MPI-2 case: see [1])


Output variables:

-time                 (type double*)
                      Timing result per sample


*/
{
    int         i = 0;
    Type_Size   s_size,
                r_size;
    int         s_num = 0,
                r_num;
    MPI_Request request;
    MPI_Status  status;
    double      t_pure = 0.;

#ifdef CHECK
    defect=0.;
#endif
    ierr = 0;

    /* GET SIZE OF DATA TYPE */
    MPI_Type_size(c_info->s_data_type, &s_size);
    MPI_Type_size(c_info->s_data_type, &r_size);
    if ((s_size != 0) && (r_size != 0)) {
        s_num = size / s_size;
        r_num = size / r_size;
    }

    /* INITIALIZATION OF DISPLACEMENT and RECEIVE COUNTS */
    for (i = 0; i < c_info->num_procs; ++i) {
        c_info->rdispl[i] = r_num * i;
        c_info->reccnt[i] = r_num;
    }

    if(c_info->rank != -1) {
        for (i = 0; i < N_BARR; i++) {
            MPI_Barrier(c_info->communicator);
        }

        t_pure = MPI_Wtime();
        for(i = 0; i < ITERATIONS->n_sample; i++)
        {
            ierr = MPI_Igatherv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                s_num,
                                c_info->s_data_type,
                                (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                                c_info->reccnt,
                                c_info->rdispl,
                                c_info->r_data_type,
                                i % c_info->num_procs, // root = round robin
                                c_info->communicator,
                                &request);
            MPI_ERRHAND(ierr);
            MPI_Wait(&request, &status);
#ifdef CHECK
            if (c_info->rank == i % c_info->num_procs) {
                CHK_DIFF("Igatherv_pure", c_info,
                         (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                         0, 0, ((size_t)c_info->num_procs * (size_t)size),
                         1, put, 0, ITERATIONS->n_sample, i, -2, &defect);
            }
#endif // CHECK
        }
        t_pure = (MPI_Wtime() - t_pure) / ITERATIONS->n_sample;
    }

    time[0] = t_pure;
}
Example #10
0
void IMB_ibcast(struct comm_info* c_info,
                int size,
                struct iter_schedule* ITERATIONS,
                MODES RUN_MODE,
                double* time)
/*

                      
                      MPI-NBC benchmark kernel
                      Benchmarks MPI_Ibcast
                      


Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-size                 (type int)                      
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling

-RUN_MODE             (type MODES)                      
                      (only MPI-2 case: see [1])


Output variables: 

-time                 (type double*)                      
                      Timing result per sample


*/
{
    int         i = 0,
                root = 0;
    Type_Size   s_size;
    int         s_num = 0;
    void*       bc_buf = NULL;
    MPI_Request request;
    MPI_Status  status;
    double      t_pure = 0.,
                t_comp = 0.,
                t_ovrlp = 0.; 

#ifdef CHECK
    defect=0.;
#endif  
    ierr = 0;

    /* GET SIZE OF DATA TYPE */
    MPI_Type_size(c_info->s_data_type, &s_size);
    if (s_size != 0) {
        s_num = size / s_size;
    }

    if(c_info->rank != -1) {
        IMB_ibcast_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure);

        /* INITIALIZATION CALL */
        IMB_cpu_exploit(t_pure, 1);
        root = 0;

        for(i=0; i<N_BARR; i++) {
            MPI_Barrier(c_info->communicator);
        }

        t_ovrlp = MPI_Wtime();
        for(i=0; i < ITERATIONS->n_sample; i++)
        {
            bc_buf = (root == c_info->rank)
                   ? c_info->s_buffer
                   : c_info->r_buffer;
            ierr = MPI_Ibcast((char*)bc_buf + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                              s_num,
                              c_info->s_data_type,
                              root,
                              c_info->communicator,
                              &request);
            MPI_ERRHAND(ierr);
            t_comp -= MPI_Wtime();
            IMB_cpu_exploit(t_pure, 0);
            t_comp += MPI_Wtime();
            MPI_Wait(&request, &status);
            CHK_DIFF("Ibcast", c_info,
                     (char*)bc_buf + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                     0, size, size, 1, put, 0, ITERATIONS->n_sample, i, root, &defect);
            /* CHANGE THE ROOT NODE */
            root = (++root) % c_info->num_procs;
        }
        t_ovrlp = (MPI_Wtime() - t_ovrlp) / ITERATIONS->n_sample;
        t_comp /= ITERATIONS->n_sample;
    }

    time[0] = t_pure;
    time[1] = t_ovrlp;
    time[2] = t_comp;
}
Example #11
0
void IMB_rma_compare_and_swap (struct comm_info* c_info, int size,  
                               struct iter_schedule* iterations,
                               MODES run_mode, double* time)
{
    double res_time = -1.;
    int root = c_info->pair1;
    int s_size;
    int i;
    void *comp_b, *orig_b, *res_b; 
    MPI_Datatype data_type = MPI_INT;
    ierr = 0;
          
    if (c_info->rank < 0)
    {
        *time = res_time;
        return;
    }    
    
    MPI_Type_size(data_type,&s_size);
    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
    

    if (c_info->rank == c_info->pair0)
    {
        /* use r_buffer for all buffers required by compare_and_swap, because 
         * on all ranks r_buffer is zero-initialized in IMB_set_buf function */
        orig_b = (char*)c_info->r_buffer + s_size*2;
        comp_b = (char*)c_info->r_buffer + s_size;
        res_b  = c_info->r_buffer;
 
        MPI_Win_lock(MPI_LOCK_SHARED, root, 0, c_info->WIN);
        if (run_mode->AGGREGATE)
        {
            res_time = MPI_Wtime();
            for (i = 0; i < iterations->n_sample; i++)
            {
                ierr = MPI_Compare_and_swap(
                        (char*)orig_b + i%iterations->r_cache_iter*iterations->r_offs,
                        (char*)comp_b + i%iterations->r_cache_iter*iterations->r_offs,
                        (char*)res_b  + i%iterations->r_cache_iter*iterations->r_offs,
                        data_type, root, i%iterations->r_cache_iter*iterations->r_offs, 
                        c_info->WIN );
                MPI_ERRHAND(ierr);
            }
            ierr = MPI_Win_flush(root, c_info->WIN);
            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
        }    
        else if ( !run_mode->AGGREGATE )    
        {
            res_time = MPI_Wtime();
            for (i = 0; i < iterations->n_sample; i++)
            {
                ierr = MPI_Compare_and_swap(
                        (char*)orig_b + i%iterations->s_cache_iter*iterations->s_offs,
                        (char*)comp_b + i%iterations->s_cache_iter*iterations->s_offs,
                        (char*)res_b  + i%iterations->r_cache_iter*iterations->r_offs,
                        data_type, root, i%iterations->r_cache_iter*iterations->r_offs,
                        c_info->WIN );
                MPI_ERRHAND(ierr);

                ierr = MPI_Win_flush(root, c_info->WIN);
                MPI_ERRHAND(ierr);
            }
            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
        }
        MPI_Win_unlock(root, c_info->WIN);
    }
    MPI_Barrier(c_info->communicator);

    *time = res_time; 
    return;
}
void IMB_accumulate (struct comm_info* c_info, int size,  struct iter_schedule* ITERATIONS,
                     MODES RUN_MODE, double* time)
/*

                      
                      MPI-2 benchmark kernel
                      Benchmarks MPI_Accumulate
                      


Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-size                 (type int)                      
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling

-RUN_MODE             (type MODES)                      
                      Mode (aggregate/non aggregate; blocking/nonblocking);
                      see "IMB_benchmark.h" for definition


Output variables: 

-time                 (type double*)                      
                      Timing result per sample


*/
{
  double t1, t2;
  
  Type_Size s_size,r_size;
  int s_num, r_num;
/* IMB 3.1 << */
  int r_off;
/* >> IMB 3.1  */
  int s_tag, r_tag;
  int dest, source, root;
  int i;
  MPI_Status stat;


#ifdef CHECK 
  defect=0;
#endif
  ierr = 0;

  /*  GET SIZE OF DATA TYPE */  
MPI_Type_size(c_info->red_data_type,&s_size);

/* IMB 3.1 << */
s_num=size/s_size;
r_size=s_size;
r_num=s_num;
r_off=ITERATIONS->r_offs/r_size;
/* >> IMB 3.1  */

root = (c_info-> rank == 0);

if( c_info-> rank < 0 )
*time = 0.;
else
{

if( !RUN_MODE->AGGREGATE )
{

*time = MPI_Wtime();

for(i=0;i< ITERATIONS->n_sample;i++)
	{

       ierr = MPI_Accumulate(
                 (char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
                 s_num, c_info->red_data_type,
                 0, i%ITERATIONS->r_cache_iter*r_off,
                 r_num, c_info->red_data_type, c_info->op_type,
                 c_info->WIN );
       MPI_ERRHAND(ierr);

       ierr = MPI_Win_fence(0, c_info->WIN);
       MPI_ERRHAND(ierr);
#ifdef CHECK
if( root ) 
{
       CHK_DIFF("Accumulate",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
                0, size, size, asize, 
                put, 0, ITERATIONS->n_sample, i,
                -1, &defect);
       IMB_ass_buf((char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0, 0, size-1, 0);
}
MPI_Barrier(c_info->communicator);
#endif

	}
*time=(MPI_Wtime()-*time)/ITERATIONS->n_sample;
}

if( RUN_MODE->AGGREGATE )
{

for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);

*time = MPI_Wtime();

#ifdef CHECK
for(i=0;i< ITERATIONS->r_cache_iter; i++)
#else
for(i=0;i< ITERATIONS->n_sample;i++)
#endif
	{

       ierr = MPI_Accumulate(
                 (char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
                 s_num, c_info->red_data_type,
                 0, i%ITERATIONS->r_cache_iter*r_off,
                 r_num, c_info->red_data_type, c_info->op_type,
                 c_info->WIN );
       MPI_ERRHAND(ierr);

	}

       ierr = MPI_Win_fence(0, c_info->WIN);
       MPI_ERRHAND(ierr);

*time=(MPI_Wtime()-*time)/ITERATIONS->n_sample;

#ifdef CHECK
if( root ) 
{
for(i=0;i< ITERATIONS->r_cache_iter; i++)
	{
    CHK_DIFF("Accumulate", c_info, (char*)c_info->r_buffer+i*ITERATIONS->r_offs,
             0, size, size, asize, 
             put, 0, ITERATIONS->n_sample, i,
             -1, &defect);
        }
}
#endif



}

}
}
Example #13
0
/*
Introduce new ITERATIONS object
*/
void IMB_output(struct comm_info* c_info, struct Bench* Bmark, MODES BMODE, 
                int header, int size, struct iter_schedule* ITERATIONS,
                double *time)
/* >> IMB 3.1  */
/*



Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-Bmark                (type struct Bench*)                      
                      (For explanation of struct Bench type:
                      describes all aspects of modes of a benchmark;
                      see [1] for more information)
                      
                      The actual benchmark
                      

-BMODE                (type MODES)                      
                      The actual benchmark mode (if relevant; only MPI-2 case, see [1])
                      

-header               (type int)                      
                      1/0 for do/don't print table headers
                      

-size                 (type int)                      
                      Benchmark message size
                      

-ITERATIONS           (type struct iter_schedule)                      
                      Benchmark repetition descr. object
                      

-time                 (type double *)                      
                      Benchmark timing outcome
                      3 numbers (min/max/average)
                      


*/
{
    double scaled_time[MAX_TIMINGS];
  
    int DO_OUT;
    int GROUP_OUT;
    int i,i_gr;
    int li_len;
    int edit_type;
  
    ierr = 0;

    DO_OUT    = (c_info->w_rank  == 0 );
    GROUP_OUT = (c_info->group_mode > 0 );

    if (DO_OUT)
    {
	/* Fix IMB_1.0.1: NULL all_times before allocation */
	IMB_v_free((void**)&all_times);

	all_times = 
	    (double*)IMB_v_alloc(c_info->w_num_procs * Bmark->Ntimes * sizeof(double), "Output 1");
#ifdef CHECK
      if(!all_defect)
      {
	  all_defect = (double*)IMB_v_alloc(c_info->w_num_procs * sizeof(double), "Output 1");
          for(i=0; i<c_info->w_num_procs; i++) all_defect[i]=0.;
      }
#endif  	  
    } /*if (DO_OUT)*/

    /* Scale the timings */
    for(i=0; i<Bmark->Ntimes;  i++)
	scaled_time[i] = time[i] * SCALE * Bmark->scale_time;


    /* collect all times  */
    ierr=MPI_Gather(scaled_time,Bmark->Ntimes,MPI_DOUBLE,all_times,Bmark->Ntimes,MPI_DOUBLE,0,MPI_COMM_WORLD);
    MPI_ERRHAND(ierr);

#ifdef CHECK      
    /* collect all defects */	      
    ierr=MPI_Gather(&defect,1,MPI_DOUBLE,all_defect,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
    MPI_ERRHAND(ierr);
#endif

    if( DO_OUT )
    {
	BTYPES type= Bmark->RUN_MODES[0].type;
	if ( Bmark->RUN_MODES[0].NONBLOCKING )
	    edit_type = 4;
	else if ( type == SingleTransfer && c_info->group_mode != 0 )
	    edit_type=0;
	else if ( type == ParallelTransfer || type == SingleTransfer )
	    edit_type=1;
	else if (type == Collective )
#ifdef MPIIO
	    edit_type=1;
#else
	    edit_type=2;
#endif
	else 
	    edit_type=3;

	if( header )
	{
	    fprintf(unit,"\n");            /* FOR GNUPLOT: CURVE SEPERATOR  */

	    if( GROUP_OUT ) {strcpy(aux_string,"&Group") ; li_len=1;}
	    else            {strcpy(aux_string,"");  li_len=0;}

	    if ( edit_type == 0 )
	    {
		li_len+=4;
		strcat(aux_string,"&#bytes&#repetitions&t[usec]&Mbytes/sec&");
	    }
	    else if ( edit_type == 1 )
	    {
		li_len+=6;
		strcat(aux_string,
		       "&#bytes&#repetitions&t_min[usec]&t_max[usec]&t_avg[usec]&Mbytes/sec&");
	    }
	    else if ( edit_type == 2 )
	    {
		li_len+=5;
		strcat(aux_string,
		       "&#bytes&#repetitions&t_min[usec]&t_max[usec]&t_avg[usec]&");
	    }
	    else if ( edit_type == 3 )
	    {
		li_len+=4;
		strcat(aux_string,
		       "&#repetitions&t_min[usec]&t_max[usec]&t_avg[usec]&");
	    }
	    else
	    {
		li_len+=6;
		strcat(aux_string,
		       "&#bytes&#repetitions&t_ovrl[usec]&t_pure[usec]&t_CPU[usec]& overlap[%]&");
	    }

#ifdef CHECK
	    if( Bmark->RUN_MODES[0].type != Sync &&
		strcmp(Bmark->name,"Window") )
	    {
		li_len+=1;
		strcat(aux_string,"&defects&");
	    }
#endif
	    IMB_make_line(li_len);

	    if( c_info->n_groups > 1) 
		fprintf(unit,"# Benchmarking Multi-%s ",Bmark->name);
	    else
		fprintf(unit,"# Benchmarking %s ",Bmark->name);

	    IMB_show_procids(c_info); 

	    IMB_make_line(li_len);

	    switch(BMODE->AGGREGATE)
	    {
		case 1:
		    fprintf(unit,"#\n#    MODE: AGGREGATE \n#\n");
		    break;

		case 0:
		    fprintf(unit,"#\n#    MODE: NON-AGGREGATE \n#\n");
		    break;
	    }

	    IMB_print_headlines(aux_string);
	} /*if( header )*/

	if( GROUP_OUT )
	{

	    /* IMB 3.1 << use ITERATIONS object */
	    for( i_gr=0; i_gr<c_info->n_groups; i_gr++ )
	    {
		if(i_gr == 0) fprintf(unit,"\n");

		IMB_display_times(Bmark, all_times, c_info, i_gr, ITERATIONS->n_sample, size, edit_type);
	    } /*for( i_gr=0; */
	}
	else
	    IMB_display_times(Bmark, all_times, c_info,  0, ITERATIONS->n_sample, size, edit_type);
    } /*if( DO_OUT )*/
Example #14
0
/*
Major reconstruction of memory management for -off_cache flag
*/
void IMB_init_buffers_iter(struct comm_info* c_info, struct iter_schedule* ITERATIONS, 
                           struct Bench* Bmark, MODES BMODE, int iter, int size)
/*

                      
                      Initializes communications buffers (call set_buf)
                      Initializes iterations scheduling


Input variables: 


-Bmark                (type struct Bench*)                      
                      (For explanation of struct Bench type:
                      describes all aspects of modes of a benchmark;
                      see [1] for more information)
                      
                      Current benchmark

-BMODE                (type MODES)
                      aggregate / non aggregate
                      
-iter                 (type int)
                      number of current iteration of message size loop

-size                 (type int)                      
                      Message size
                      

In/out variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      
                      Communications buffers are allocated and assigned values

-ITERATIONS           (type struct iter_schedule*)
                      Adaptive number of iterations, out of cache scheduling are
                      setup if requested
                      


*/
/* >> IMB 3.1  */
{
/* IMB 3.1 << */
    size_t s_len, r_len, s_alloc, r_alloc;
    int init_size, irep, i_s, i_r, x_sample;

    x_sample = BMODE->AGGREGATE ? ITERATIONS->msgspersample : ITERATIONS->msgs_nonaggr;

    /* July 2002 fix V2.2.1: */
#if (defined EXT || defined MPIIO)
    if( Bmark->access==no ) x_sample=ITERATIONS->msgs_nonaggr;
#endif

    if ( size>0 )
	ITERATIONS->n_sample =max(1,min(ITERATIONS->overall_vol/size,x_sample));
    else ITERATIONS->n_sample = x_sample ;

    Bmark->sample_failure=0;

    init_size = max(size,asize);

    if(c_info->rank < 0) return;

    if(!strcmp(Bmark->name,"Alltoall") || !strcmp(Bmark->name,"Alltoallv"))
    {
	s_len = (size_t) c_info->num_procs*init_size;
	r_len = (size_t) c_info->num_procs*init_size;
    }
    else if( !strcmp(Bmark->name,"Allgather") || !strcmp(Bmark->name,"Allgatherv")
	     ||!strcmp(Bmark->name,"Gather") || !strcmp(Bmark->name,"Gatherv") )
    {
	s_len = (size_t) init_size;
	r_len = (size_t) c_info->num_procs*init_size;
    }
    else if( !strcmp(Bmark->name,"Exchange") )
    {
	s_len = (size_t) 2*init_size;
	r_len = (size_t) init_size;
    }
    else if( !strcmp(Bmark->name,"Scatter") || !strcmp(Bmark->name,"Scatterv") )
    {
	s_len = (size_t) c_info->num_procs*init_size;
	r_len = (size_t) init_size;
    }
    else if( !strcmp(Bmark->name,"Barrier") || /*!strcmp(Bmark->name,"Window") ||*/ !strcmp(Bmark->name,"Open_Close") )
    {
	s_len = r_len = 0;
    }
    else
	s_len = r_len = (size_t) init_size;


    /*===============================================*/
    /* the displ is declared as int by MPI1 standard
       If c_info->num_procs*init_size  exceed INT_MAX value there is no way to run this sample
     */
    if( !strcmp(Bmark->name,"Alltoallv")  || 
	!strcmp(Bmark->name,"Allgatherv") ||
	!strcmp(Bmark->name,"Scatterv")	  ||
	!strcmp(Bmark->name,"Gatherv"))
    {
	if( s_len > INT_MAX || r_len > INT_MAX)
	{
	    Bmark->sample_failure=SAMPLE_FAILED_INT_OVERFLOW;
	    return;
	}
    }
    /*===============================================*/

    /* IMB 3.1: new memory management for -off_cache */
    if( BMODE->type == Sync ) 
    {
	ITERATIONS->use_off_cache=0;
	ITERATIONS->n_sample=x_sample;
    }
    else
    {
#ifdef MPIIO
	ITERATIONS->use_off_cache=0;
#else  
	ITERATIONS->use_off_cache=ITERATIONS->off_cache;
#endif  

	/*ITERATIONS->use_off_cache=ITERATIONS->off_cache;*/

	if( ITERATIONS->off_cache ) 
	{

	    if( ITERATIONS->cache_size>0 )
	    {
		size_t	cls = (size_t) ITERATIONS->cache_line_size;
		size_t  ofs;

		ofs = ( ( s_len + cls -1 )/cls + 1 )*cls;
		ITERATIONS->s_offs = ofs;
		ITERATIONS->s_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs);
		ofs = ( ( r_len + cls -1 )/cls + 1 )*cls;
		ITERATIONS->r_offs = ofs;
		ITERATIONS->r_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs);
	    }
	    else
	    {
		ITERATIONS->s_offs=ITERATIONS->r_offs=0;
		ITERATIONS->s_cache_iter=ITERATIONS->r_cache_iter=1;
	    }

	}	

    }

#ifdef MPIIO
    s_alloc = s_len;
    r_alloc = r_len;
#else
    if( ITERATIONS->use_off_cache ) 
    {
	s_alloc = max(s_len,ITERATIONS->s_cache_iter*ITERATIONS->s_offs);
	r_alloc = max(r_len,ITERATIONS->r_cache_iter*ITERATIONS->r_offs);
    }
    else
    {
	s_alloc = s_len;
	r_alloc = r_len;
    }
#endif

    c_info->used_mem = 1.f*(s_alloc+r_alloc)/MEM_UNIT;

#ifdef DEBUG 
    {
	size_t mx, mu;

	mx = (size_t) MEM_UNIT*c_info->max_mem;
	mu = (size_t) MEM_UNIT*c_info->used_mem;

	DBG_I3("Got send / recv lengths; iters ",s_len,r_len,ITERATIONS->n_sample);
	DBG_I2("max  / used memory ",mx,mu);
	DBG_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs);
	DBG_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter); 
	DBG_I2("send / recv buffer allocations ",s_alloc, r_alloc);
	DBGF_I2("Got send / recv lengths ",s_len,r_len);
	DBGF_I2("max  / used memory ",mx,mu);
	DBGF_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs);
	DBGF_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter); 
	DBGF_I2("send / recv buffer allocations ",s_alloc, r_alloc);
    }
#endif

    if( s_alloc + r_alloc > c_info->max_mem*MEM_UNIT )
    {
	Bmark->sample_failure=SAMPLE_FAILED_MEMORY;
    }
    else 
    {

	if( ITERATIONS->use_off_cache )
	{

	    if( s_alloc > 0  && r_alloc > 0)
	    {
		IMB_alloc_buf(c_info, "IMB_init_buffers_iter 1", s_alloc, r_alloc);
		IMB_set_buf(c_info, c_info->rank, 0, s_len-1, 0, r_len-1);

		for( irep=1; irep<ITERATIONS->s_cache_iter; irep++)
		{
		    i_s=irep%ITERATIONS->s_cache_iter;
		    memcpy((void*)((char*)c_info->s_buffer+i_s*ITERATIONS->s_offs),c_info->s_buffer, s_len);
		}

		for( irep=1; irep<ITERATIONS->r_cache_iter; irep++)
		{
		    i_r=irep%ITERATIONS->r_cache_iter;
		    memcpy((void*)((char*)c_info->r_buffer+i_r*ITERATIONS->r_offs),c_info->r_buffer, r_len);
		}

	    }
	}
	else
	{
	    if( s_alloc > 0  && r_alloc > 0)
	    {
		IMB_set_buf(c_info, c_info->rank, 0, s_alloc-1, 0, r_alloc-1);
	    }
	    
	}

	IMB_init_transfer(c_info, Bmark, size, (MPI_Aint) max(s_alloc, r_alloc));

	/* Determine #iterations if dynamic adaptation requested */
	if( ITERATIONS->iter_dyn )
	{
	    double time[2];
	    int selected_n_sample;

	    int rep_test, acc_rep_test, t_sample;

	    selected_n_sample=ITERATIONS->n_sample;

	    if( iter==0 || BMODE->type == Sync) 
	    {
		ITERATIONS->n_sample_prev=ITERATIONS->msgspersample;

		if( c_info->n_lens> 0)
		{
		    int i;
		    for(i=0; i<c_info->n_lens; i++) ITERATIONS->numiters[i]=0;
		}
	    }

	    rep_test=1;

	    ITERATIONS->n_sample=rep_test;

	    time[0]=time[1]=0;

	    /* first, run 1 iteration only */
#ifdef MPI1
	    c_info->select_source = Bmark->select_source;
#endif
	    Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);

	    time[1] = time[0];

#ifdef MPIIO
	    if( Bmark->access != no)
	    {
        	    ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET);
        	    MPI_ERRHAND(ierr);

		    if( Bmark->fpointer == shared)
		    {
    			ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET);
			MPI_ERRHAND(ierr);
		    }
	    }
#endif /*MPIIO*/

	    MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator);

	    /* determine rough #repetitions for a run time of 1 sec */

	    if( time[0] < 0.001 )
	    {
		rep_test=1000;
	    }
	    else if( time[0]<1. )
	    {
		rep_test = (int) (1./time[0]+.5);
	    }

	    MPI_Allreduce(&rep_test, &acc_rep_test, 1, MPI_INT, MPI_MAX, c_info->communicator);

	    ITERATIONS->n_sample=min(selected_n_sample,acc_rep_test);

	    if( ITERATIONS->n_sample>1 ) 
	    {
#ifdef MPI1
		c_info->select_source = Bmark->select_source;
#endif
		Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);
		time[1] = time[0];
		
#ifdef MPIIO
	    if( Bmark->access != no)
	    {
        	    ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET);
        	    MPI_ERRHAND(ierr);

		    if( Bmark->fpointer == shared)
		    {
    			ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET);
			MPI_ERRHAND(ierr);
		    }
	    }
#endif /*MPIIO*/
		
		MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator);
	    }

	    if( time[0] > 1.e-8 )
	    {
		float val = (float) (1+ITERATIONS->secs/time[0]);
		t_sample = (val<= (float) 0x7fffffff) ? (int) val :  selected_n_sample;
	    }
	    else
	    {
		t_sample = selected_n_sample;
	    }

	    if( c_info->n_lens>0 && BMODE->type != Sync)
	    {
		// check monotonicity with msg sizes 
		int it;
		for(it=0; it<iter; it++)
		{
		    if( c_info->msglen[it] < size ) t_sample = min(t_sample,ITERATIONS->numiters[it]);
		    else                            t_sample = max(t_sample,ITERATIONS->numiters[it]);
		}

		ITERATIONS->n_sample = ITERATIONS->numiters[iter] = min( selected_n_sample,t_sample );
	    }
	    else
	    {
		ITERATIONS->n_sample = min( selected_n_sample, min( ITERATIONS->n_sample_prev, t_sample ) );
	    }

	    MPI_Bcast(&ITERATIONS->n_sample, 1, MPI_INT, 0, c_info->communicator);

#ifdef DEBUG
	    {
		int usec=time*1000000;

		DBGF_I2("Checked time with #iters / usec ",acc_rep_test,usec);
		DBGF_I1("=> # samples, aligned with previous ",t_sample);
		DBGF_I1("final #samples ",ITERATIONS->n_sample);
	    }
#endif
	} /*if( ITERATIONS->iter_dyn )*/

	ITERATIONS->n_sample_prev=ITERATIONS->n_sample;

  } /*if (!( s_alloc + r_alloc > c_info->max_mem*MEM_UNIT ))*/


/* >> IMB 3.1  */

}
Example #15
0
void IMB_alloc_buf(struct comm_info* c_info, char* where, size_t s_len,
                   size_t r_len)
/*


                      Allocates send/recv buffers for message passing



Input variables:

-where                (type char*)
                      Comment (marker for calling place)


-s_len                (type int)
                      Send buffer length (bytes)


-r_len                (type int)
                      Recv buffer length (bytes)



In/out variables:

-c_info               (type struct comm_info*)
                      Collection of all base data for MPI;
                      see [1] for more information

                      Send/Recv buffer components get allocated



*/
{
    /* July 2002 V2.2.1 change: use MPI_Alloc_mem */
#if ( defined EXT || defined MPIIO || RMA )
    MPI_Aint slen = (MPI_Aint)(max(1,s_len));
    MPI_Aint rlen = (MPI_Aint)(max(1,r_len));
    int ierr;
#else
    s_len=max(1,s_len);
    r_len=max(1,r_len);
#endif

    if( c_info->s_alloc < s_len )
    {
        /* July 2002 V2.2.1 change: use MPI_Alloc_mem */
#if ( defined EXT || defined MPIIO || RMA)
        if (c_info->s_buffer)
            MPI_Free_mem(c_info->s_buffer);

        ierr=MPI_Alloc_mem(slen, MPI_INFO_NULL, &c_info->s_buffer);
        MPI_ERRHAND(ierr);
        c_info->s_alloc = slen;
#else
        IMB_v_free((void**)&c_info->s_buffer);

        c_info->s_buffer = IMB_v_alloc(s_len,where);
        c_info->s_alloc = s_len;
#endif

        c_info->s_data  = (assign_type*)c_info->s_buffer;
    }

    if( c_info->r_alloc < r_len )
    {
        /* July 2002 V2.2.1 change: use MPI_Alloc_mem */
#if ( defined EXT || defined MPIIO || RMA)
        if (c_info->r_buffer)
            MPI_Free_mem(c_info->r_buffer);

        ierr=MPI_Alloc_mem(rlen, MPI_INFO_NULL, &c_info->r_buffer);
        MPI_ERRHAND(ierr);
        c_info->r_alloc = rlen;
#else
        IMB_v_free((void**)&c_info->r_buffer);

        c_info->r_buffer = IMB_v_alloc(r_len,where);
        c_info->r_alloc = r_len;
#endif

        c_info->r_data = (assign_type*)c_info->r_buffer;
    }
}
Example #16
0
void IMB_rma_accumulate (struct comm_info* c_info, int size,  
                         struct iter_schedule* iterations,
                         MODES run_mode, double* time)
{
    double res_time = -1.;
    Type_Size s_size,r_size;
    int s_num, r_num;
    /* IMB 3.1 << */
    int r_off;
    int i;
    int root = c_info->pair1;
    ierr = 0;
     
    if (c_info->rank < 0)
    {
        *time = res_time;
        return;
    }    
    
    MPI_Type_size(c_info->red_data_type,&s_size);
    s_num=size/s_size;
    r_size=s_size;
    r_num=s_num;
    r_off=iterations->r_offs/r_size;

    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);

    if (c_info->rank == c_info->pair0)
    {
        MPI_Win_lock(MPI_LOCK_SHARED, root, 0, c_info->WIN);
        if (run_mode->AGGREGATE)
        {
            res_time = MPI_Wtime();
            for (i = 0; i < iterations->n_sample; i++)
            {
                ierr = MPI_Accumulate(
                        (char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
                        s_num, c_info->red_data_type, root, 
                        i%iterations->r_cache_iter*r_off, r_num, 
                        c_info->red_data_type, c_info->op_type, c_info->WIN );
                MPI_ERRHAND(ierr);
            }
            ierr = MPI_Win_flush(root, c_info->WIN);
            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
        }    
        else if ( !run_mode->AGGREGATE )    
        {
            res_time = MPI_Wtime();
            for (i = 0; i < iterations->n_sample; i++)
            {
                ierr = MPI_Accumulate(
                        (char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
                        s_num, c_info->red_data_type, root, 
                        i%iterations->r_cache_iter*r_off, r_num, 
                        c_info->red_data_type, c_info->op_type, c_info->WIN );
                MPI_ERRHAND(ierr);

                ierr = MPI_Win_flush(root, c_info->WIN);
                MPI_ERRHAND(ierr);
            }
            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
        }
        MPI_Win_unlock(root, c_info->WIN);
    }
    MPI_Barrier(c_info->communicator);

    *time = res_time; 
    return;
}    
Example #17
0
/*
Major reconstruction of memory management for -off_cache flag
*/
void IMB_init_buffers_iter(struct comm_info* c_info, struct iter_schedule* ITERATIONS,
                           struct Bench* Bmark, MODES BMODE, int iter, int size)
/*


                      Initializes communications buffers (call set_buf)
                      Initializes iterations scheduling


Input variables:


-Bmark                (type struct Bench*)
                      (For explanation of struct Bench type:
                      describes all aspects of modes of a benchmark;
                      see [1] for more information)

                      Current benchmark

-BMODE                (type MODES)
                      aggregate / non aggregate

-iter                 (type int)
                      number of current iteration of message size loop

-size                 (type int)
                      Message size


In/out variables:

-c_info               (type struct comm_info*)
                      Collection of all base data for MPI;
                      see [1] for more information

                      Communications buffers are allocated and assigned values

-ITERATIONS           (type struct iter_schedule*)
                      Adaptive number of iterations, out of cache scheduling are
                      setup if requested



*/
/* >> IMB 3.1  */
{
    /* IMB 3.1 << */
    size_t s_len, r_len, s_alloc, r_alloc;
    int init_size, irep, i_s, i_r, x_sample;
    const int root_based = has_root(Bmark->name);


    x_sample = BMODE->AGGREGATE ? ITERATIONS->msgspersample : ITERATIONS->msgs_nonaggr;

    /* July 2002 fix V2.2.1: */
#if (defined EXT || defined MPIIO || RMA)
    if( Bmark->access==no ) x_sample=ITERATIONS->msgs_nonaggr;
#endif

    ITERATIONS->n_sample = (size > 0)
                           ? max(1, min(ITERATIONS->overall_vol / size, x_sample))
                           : x_sample;

    Bmark->sample_failure = 0;

    init_size = max(size, asize);

    if (c_info->rank < 0) {
        return;
    } else {

        if (ITERATIONS->iter_policy == imode_off) {
            ITERATIONS->n_sample = x_sample = ITERATIONS->msgspersample;
        } else if ((ITERATIONS->iter_policy == imode_multiple_np) || (ITERATIONS->iter_policy == imode_auto && root_based)) {
            /* n_sample for benchmarks with uneven distribution of works
               must be greater or equal and multiple to num_procs.
               The formula below is a negative leg of hyperbola.
               It's moved and scaled relative to max message size
               and initial n_sample subject to multiple to num_procs.
            */
            double d_n_sample = ITERATIONS->msgspersample;
            int max_msg_size = 1<<c_info->max_msg_log;
            int tmp = (int)(d_n_sample*max_msg_size/(c_info->num_procs*init_size+max_msg_size)+0.5);
            ITERATIONS->n_sample = x_sample = max(tmp-tmp%c_info->num_procs, c_info->num_procs);
        } /* else as is */
    }

    if (
#ifdef MPI1
        !strcmp(Bmark->name,"Alltoall") || !strcmp(Bmark->name,"Alltoallv")
#elif defined NBC // MPI1
        !strcmp(Bmark->name, "Ialltoall")  || !strcmp(Bmark->name, "Ialltoall_pure")
        || !strcmp(Bmark->name, "Ialltoallv") || !strcmp(Bmark->name, "Ialltoallv_pure")
#else
        0
#endif // NBC // MPI1
    )
    {
        s_len = (size_t)c_info->num_procs * (size_t)init_size;
        r_len = (size_t)c_info->num_procs * (size_t)init_size;
    }
    else if (
#ifdef MPI1
        !strcmp(Bmark->name, "Allgather")   || !strcmp(Bmark->name, "Allgatherv")
        || !strcmp(Bmark->name, "Gather")      || !strcmp(Bmark->name, "Gatherv")
#elif defined NBC
        !strcmp(Bmark->name, "Iallgather")  || !strcmp(Bmark->name, "Iallgather_pure")
        || !strcmp(Bmark->name, "Iallgatherv") || !strcmp(Bmark->name, "Iallgatherv_pure")
        || !strcmp(Bmark->name, "Igather")     || !strcmp(Bmark->name, "Igather_pure")
        || !strcmp(Bmark->name, "Igatherv")    || !strcmp(Bmark->name, "Igatherv_pure")
#else // MPI1 // NBC
        0
#endif // MPI1 // NBC
    )
    {
        s_len = (size_t) init_size;
        r_len = (size_t) c_info->num_procs * (size_t)init_size;
    }
    else if( !strcmp(Bmark->name,"Exchange") )
    {
        s_len = 2 * (size_t)init_size;
        r_len = (size_t) init_size;
    }
    else if(
#ifdef MPI1
        !strcmp(Bmark->name,"Scatter") || !strcmp(Bmark->name,"Scatterv")
#elif defined NBC // MPI1
        !strcmp(Bmark->name,"Iscatter")  || !strcmp(Bmark->name,"Iscatter_pure")
        || !strcmp(Bmark->name,"Iscatterv") || !strcmp(Bmark->name,"Iscatterv_pure")
#else // NBC // MPI1
        0
#endif // NBC // MPI1
    )
    {
        s_len = (size_t)c_info->num_procs * (size_t)init_size;
        r_len = (size_t)init_size;
    } else if( !strcmp(Bmark->name,"Barrier") || /*!strcmp(Bmark->name,"Window") ||*/ !strcmp(Bmark->name,"Open_Close") ) {
        s_len = r_len = 0;
    }
    else if ( ! strcmp(Bmark->name,"Exchange_put") || ! strcmp(Bmark->name,"Exchange_get") )
    {
        s_len = 2 * (size_t)init_size;
        r_len = 2 * (size_t)init_size;
    }
    else if (! strcmp(Bmark->name,"Compare_and_swap") )
    {
        /* Compare_and_swap operations require 3 buffers, so allocate space for compare
         * buffers in our r_buffer */
        s_len = (size_t)init_size;
        r_len = 3 * (size_t)init_size;
    }
    else
    {
        s_len = r_len = (size_t) init_size;
    }

    /*===============================================*/
    /* the displ is declared as int by MPI1 standard
       If c_info->num_procs*init_size  exceed INT_MAX value there is no way to run this sample
     */
    if (
#ifdef MPI1
        !strcmp(Bmark->name,"Alltoallv")  ||
        !strcmp(Bmark->name,"Allgatherv") ||
        !strcmp(Bmark->name,"Scatterv")   ||
        !strcmp(Bmark->name,"Gatherv")
#elif defined NBC // MPI1
        !strcmp(Bmark->name,"Ialltoallv")  || !strcmp(Bmark->name,"Ialltoallv_pure")  ||
        !strcmp(Bmark->name,"Iallgatherv") || !strcmp(Bmark->name,"Iallgatherv_pure") ||
        !strcmp(Bmark->name,"Iscatterv")   || !strcmp(Bmark->name,"Iscatterv_pure")   ||
        !strcmp(Bmark->name,"Igatherv")    || !strcmp(Bmark->name,"Igatherv_pure")
#else // NBC // MPI1
        0
#endif // NBC // MPI1
    )
    {
        if( s_len > INT_MAX || r_len > INT_MAX) {
            Bmark->sample_failure = SAMPLE_FAILED_INT_OVERFLOW;
            return;
        }
    }
    /*===============================================*/

    /* IMB 3.1: new memory management for -off_cache */
    if (BMODE->type == Sync) {
        ITERATIONS->use_off_cache=0;
        ITERATIONS->n_sample=x_sample;
    } else {
#ifdef MPIIO
        ITERATIONS->use_off_cache=0;
#else
        ITERATIONS->use_off_cache = ITERATIONS->off_cache;
#endif
        if (ITERATIONS->off_cache) {
            if ( ITERATIONS->cache_size > 0) {
                size_t cls = (size_t) ITERATIONS->cache_line_size;
                size_t ofs = ( (s_len + cls - 1) / cls + 1 ) * cls;
                ITERATIONS->s_offs = ofs;
                ITERATIONS->s_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs);
                ofs = ( ( r_len + cls -1 )/cls + 1 )*cls;
                ITERATIONS->r_offs = ofs;
                ITERATIONS->r_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs);
            } else {
                ITERATIONS->s_offs=ITERATIONS->r_offs=0;
                ITERATIONS->s_cache_iter=ITERATIONS->r_cache_iter=1;
            }
        }
    }

#ifdef MPIIO
    s_alloc = s_len;
    r_alloc = r_len;
#else
    if( ITERATIONS->use_off_cache ) {
        s_alloc = max(s_len,ITERATIONS->s_cache_iter*ITERATIONS->s_offs);
        r_alloc = max(r_len,ITERATIONS->r_cache_iter*ITERATIONS->r_offs);
    } else {
        s_alloc = s_len;
        r_alloc = r_len;
    }
#endif

    c_info->used_mem = 1.f*(s_alloc+r_alloc)/MEM_UNIT;

#ifdef DEBUG
    {
        size_t mx, mu;

        mx = (size_t) MEM_UNIT*c_info->max_mem;
        mu = (size_t) MEM_UNIT*c_info->used_mem;

        DBG_I3("Got send / recv lengths; iters ",s_len,r_len,ITERATIONS->n_sample);
        DBG_I2("max  / used memory ",mx,mu);
        DBG_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs);
        DBG_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter);
        DBG_I2("send / recv buffer allocations ",s_alloc, r_alloc);
        DBGF_I2("Got send / recv lengths ",s_len,r_len);
        DBGF_I2("max  / used memory ",mx,mu);
        DBGF_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs);
        DBGF_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter);
        DBGF_I2("send / recv buffer allocations ",s_alloc, r_alloc);
    }
#endif

    if( c_info->used_mem > c_info->max_mem ) {
        Bmark->sample_failure=SAMPLE_FAILED_MEMORY;
        return;
    }

    if (s_alloc > 0  && r_alloc > 0) {
        if (ITERATIONS->use_off_cache) {
            IMB_alloc_buf(c_info, "IMB_init_buffers_iter 1", s_alloc, r_alloc);
            IMB_set_buf(c_info, c_info->rank, 0, s_len-1, 0, r_len-1);

            for (irep = 1; irep < ITERATIONS->s_cache_iter; irep++) {
                i_s = irep % ITERATIONS->s_cache_iter;
                memcpy((void*)((char*)c_info->s_buffer + i_s * ITERATIONS->s_offs), c_info->s_buffer, s_len);
            }

            for (irep = 1; irep < ITERATIONS->r_cache_iter; irep++) {
                i_r = irep % ITERATIONS->r_cache_iter;
                memcpy((void*)((char*)c_info->r_buffer + i_r * ITERATIONS->r_offs), c_info->r_buffer, r_len);
            }
        } else {
            IMB_set_buf(c_info, c_info->rank, 0, s_alloc-1, 0, r_alloc-1);
        }
    }

    IMB_init_transfer(c_info, Bmark, size, (MPI_Aint) max(s_alloc, r_alloc));

    /* Determine #iterations if dynamic adaptation requested */
    if ((ITERATIONS->iter_policy == imode_dynamic) || (ITERATIONS->iter_policy == imode_auto && !root_based)) {
        double time[MAX_TIME_ID];
        int acc_rep_test, t_sample;
        int selected_n_sample = ITERATIONS->n_sample;

        memset(time, 0, MAX_TIME_ID);
        if (iter == 0 || BMODE->type == Sync) {
            ITERATIONS->n_sample_prev = ITERATIONS->msgspersample;
            if (c_info->n_lens > 0) {
                memset(ITERATIONS->numiters, 0, c_info->n_lens);
            }
        }

        /* first, run 1 iteration only */
        ITERATIONS->n_sample=1;
#ifdef MPI1
        c_info->select_source = Bmark->select_source;
#endif
        Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);

        time[1] = time[0];

#ifdef MPIIO
        if( Bmark->access != no) {
            ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET);
            MPI_ERRHAND(ierr);

            if( Bmark->fpointer == shared) {
                ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET);
                MPI_ERRHAND(ierr);
            }
        }
#endif /*MPIIO*/

        MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator);

        {   /* determine rough #repetitions for a run time of 1 sec */
            int rep_test = 1;
            if (time[0] < (1.0 / MSGSPERSAMPLE)) {
                rep_test = MSGSPERSAMPLE;
            } else if ((time[0] < 1.0)) {
                rep_test = (int)(1.0 / time[0] + 0.5);
            }

            MPI_Allreduce(&rep_test, &acc_rep_test, 1, MPI_INT, MPI_MAX, c_info->communicator);
        }

        ITERATIONS->n_sample = min(selected_n_sample, acc_rep_test);

        if (ITERATIONS->n_sample > 1) {
#ifdef MPI1
            c_info->select_source = Bmark->select_source;
#endif
            Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);
            time[1] = time[0];
#ifdef MPIIO
            if( Bmark->access != no) {
                ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET);
                MPI_ERRHAND(ierr);

                if ( Bmark->fpointer == shared) {
                    ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET);
                    MPI_ERRHAND(ierr);
                }
            }
#endif /*MPIIO*/

            MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator);
        }

        {
            float val = (float) (1+ITERATIONS->secs/time[0]);
            t_sample = (time[0] > 1.e-8 && (val <= (float) 0x7fffffff))
                       ? (int)val
                       : selected_n_sample;
        }

        if (c_info->n_lens>0 && BMODE->type != Sync) {
            // check monotonicity with msg sizes
            int i;
            for (i = 0; i < iter; i++) {
                t_sample = ( c_info->msglen[i] < size )
                           ? min(t_sample,ITERATIONS->numiters[i])
                           : max(t_sample,ITERATIONS->numiters[i]);
            }
            ITERATIONS->n_sample = ITERATIONS->numiters[iter] = min(selected_n_sample, t_sample);
        } else {
            ITERATIONS->n_sample = min(selected_n_sample,
                                       min(ITERATIONS->n_sample_prev, t_sample));
        }

        MPI_Bcast(&ITERATIONS->n_sample, 1, MPI_INT, 0, c_info->communicator);

#ifdef DEBUG
        {
            int usec=time*1000000;

            DBGF_I2("Checked time with #iters / usec ",acc_rep_test,usec);
            DBGF_I1("=> # samples, aligned with previous ",t_sample);
            DBGF_I1("final #samples ",ITERATIONS->n_sample);
        }
#endif
    } else { /*if( (ITERATIONS->iter_policy == imode_dynamic) || (ITERATIONS->iter_policy == imode_auto && !root_based) )*/
        double time[MAX_TIME_ID];
        Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);
    }

    ITERATIONS->n_sample_prev=ITERATIONS->n_sample;

    /* >> IMB 3.1  */

}
Example #18
0
/*
Introduce new ITERATIONS object
*/
void IMB_output(struct comm_info* c_info, struct Bench* Bmark, MODES BMODE, 
                int header, int size, struct iter_schedule* ITERATIONS,
                double *time)
/* >> IMB 3.1  */
/*



Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-Bmark                (type struct Bench*)                      
                      (For explanation of struct Bench type:
                      describes all aspects of modes of a benchmark;
                      see [1] for more information)
                      
                      The actual benchmark
                      

-BMODE                (type MODES)                      
                      The actual benchmark mode (if relevant; only MPI-2 case, see [1])
                      

-header               (type int)                      
                      1/0 for do/don't print table headers
                      

-size                 (type int)                      
                      Benchmark message size
                      

-ITERATIONS           (type struct iter_schedule)                      
                      Benchmark repetition descr. object
                      

-time                 (type double *)                      
                      Benchmark timing outcome
                      3 numbers (min/max/average)
                      


*/
{
    double scaled_time[MAX_TIME_ID];

    int i,i_gr;
    int li_len;
    int out_format;

    const int DO_OUT    = (c_info->w_rank  == 0)   ? 1 : 0;
    const int GROUP_OUT = (c_info->group_mode > 0) ? 1 : 0;

    ierr = 0;

    if (DO_OUT)
    {
        /* Fix IMB_1.0.1: NULL all_times before allocation */
        IMB_v_free((void**)&all_times);

        all_times = (double*)IMB_v_alloc(c_info->w_num_procs * Bmark->Ntimes * sizeof(double), "Output 1");
#ifdef CHECK
      if(!all_defect)
      {
          all_defect = (double*)IMB_v_alloc(c_info->w_num_procs * sizeof(double), "Output 1");
          for(i=0; i<c_info->w_num_procs; i++) all_defect[i]=0.;
      }
#endif   
    } /*if (DO_OUT)*/

    /* Scale the timings */
    for(i=0; i < Bmark->Ntimes; i++) 
    {
        scaled_time[i] = time[i] * SCALE * Bmark->scale_time;
    }

    /* collect all times  */
    ierr=MPI_Gather(scaled_time,Bmark->Ntimes,MPI_DOUBLE,all_times,Bmark->Ntimes,MPI_DOUBLE,0,MPI_COMM_WORLD);
    MPI_ERRHAND(ierr);

#ifdef CHECK      
    /* collect all defects */     
    ierr=MPI_Gather(&defect,1,MPI_DOUBLE,all_defect,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
    MPI_ERRHAND(ierr);
#endif

    if( DO_OUT ) 
    {
        BTYPES type= Bmark->RUN_MODES[0].type;
        const int n_groups = GROUP_OUT ? c_info->n_groups : 1;

        if ( Bmark->RUN_MODES[0].NONBLOCKING && type != Sync) 
        {
            out_format = OUT_OVERLAP;
        } 
        else if ( (type == SingleTransfer && c_info->group_mode != 0) || 
                   type == MultPassiveTransfer || 
                   type == SingleElementTransfer ) 
        {
            out_format = OUT_TIME_AND_BW;
        } 
        else if ( type == ParallelTransfer || type == SingleTransfer ) 
        {
            out_format = OUT_TIME_RANGE_AND_BW;
        } 
        else if ( type == ParallelTransferMsgRate ) 
        {
            out_format = OUT_BW_AND_MSG_RATE;
        }
        else if (type == Collective ) 
        {
#ifdef MPIIO
            out_format = OUT_TIME_RANGE_AND_BW;
#else
            out_format = OUT_TIME_RANGE;
#endif
        } 
        else 
        {
            out_format = OUT_SYNC;
        }

        if (header)
        {
            IMB_print_header (out_format, Bmark, c_info, BMODE);
        } 

        if( GROUP_OUT )
        {
            fprintf(unit,"\n");
        }

        for(i_gr = 0; i_gr < n_groups; i_gr++)
        {
            IMB_display_times(Bmark, all_times, c_info, i_gr, ITERATIONS->n_sample, size, out_format);
        } 
    } /*if( DO_OUT )*/
}
Example #19
0
void IMB_bcast(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
               MODES RUN_MODE, double* time)
/*

                      
                      MPI-1 benchmark kernel
                      Benchmarks MPI_Bcast
                      


Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-size                 (type int)                      
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling

-RUN_MODE             (type MODES)                      
                      (only MPI-2 case: see [1])


Output variables: 

-time                 (type double*)                      
                      Timing result per sample


*/
{
    double t1, t2;
    int    i,i1;
    Type_Size s_size;
    int s_num;
    void* bc_buf;

#ifdef CHECK
    defect=0.;
#endif
    ierr = 0;

    /*  GET SIZE OF DATA TYPE */
    MPI_Type_size(c_info->s_data_type,&s_size);
    if (s_size!=0) s_num=size/s_size;


    if(c_info->rank!=-1)
    {
        i1=0;
        for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);


        t1 = MPI_Wtime();
        for(i=0;i< ITERATIONS->n_sample;i++)
        {
            /* Provide that s_buffer is not overwritten */
            bc_buf = (i1 == c_info->rank) ? c_info->s_buffer : c_info->r_buffer;
            ierr= MPI_Bcast((char*)bc_buf+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
                            s_num,c_info->s_data_type,
                            i1,c_info->communicator);
            MPI_ERRHAND(ierr);

            CHK_DIFF("Bcast", c_info,
                     (char*)bc_buf + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                     0, size, size, 1, put, 0, ITERATIONS->n_sample, i, i1, &defect);

            /*  CHANGE THE ROOT NODE */
            i1=(++i1)%c_info->num_procs;
        }
        t2 = MPI_Wtime();
        *time=(t2 - t1)/(ITERATIONS->n_sample);
    }
    else
    {
        *time = 0.;
    }
}
Example #20
0
void IMB_gatherv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                 MODES RUN_MODE, double* time)
/*

                      
                      MPI-1 benchmark kernel
                      Benchmarks MPI_Gatherv
                      


Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-size                 (type int)                      
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling

-RUN_MODE             (type MODES)                      
                      (only MPI-2 case: see [1])


Output variables: 

-time                 (type double*)                      
                      Timing result per sample


*/
{
  double t1, t2;
  int    i;
  Type_Size s_size,r_size;
  int s_num, r_num;

#ifdef CHECK
defect=0.;
#endif
  ierr = 0;

  /*  GET SIZE OF DATA TYPE */  
  MPI_Type_size(c_info->s_data_type,&s_size);
  MPI_Type_size(c_info->r_data_type,&r_size);
  if ((s_size!=0) && (r_size!=0))
  {
      s_num=size/s_size;
      r_num=size/r_size;
  } 

  /* INITIALIZATION OF DISPLACEMENT and RECEIVE COUNTS */

  for (i=0;i<c_info->num_procs ;i++)
  {
      c_info->rdispl[i] = r_num*i;
      c_info->reccnt[i] = r_num;
  }

    *time = 0.;
    if(c_info->rank!=-1)
    {
        int root = 0;
        IMB_do_n_barriers(c_info->communicator, N_BARR);

        for(i=0;i<ITERATIONS->n_sample;i++)
        {
            t1 = MPI_Wtime();
            ierr = MPI_Gatherv((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
                                s_num,c_info->s_data_type,
                                (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
                                c_info->reccnt,c_info->rdispl,
                                c_info->r_data_type,
                                root,
                                c_info->communicator);
            MPI_ERRHAND(ierr);
            t2 = MPI_Wtime();
            *time += (t2 - t1);
#ifdef CHECK
            if( c_info->rank == root )
            {
                 CHK_DIFF("Gatherv",c_info, 
                          (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0,
                          0, (size_t) c_info->num_procs * (size_t) size, 1, 
                          put, 0, ITERATIONS->n_sample, i,
                          -2, &defect);
            }
#endif
            root = (root + c_info->root_shift) % c_info->num_procs;
            IMB_do_n_barriers(c_info->communicator, c_info->sync);
        }
        *time /= ITERATIONS->n_sample;
    }
}
void IMB_allreduce(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                   MODES RUN_MODE, double* time)
/*

                      
                      MPI-1 benchmark kernel
                      Benchmarks MPI_Allreduce
                      


Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-size                 (type int)                      
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling


-RUN_MODE             (type MODES)                      
                      (only MPI-2 case: see [1])


Output variables: 

-time                 (type double*)                      
                      Timing result per sample


*/
{
  double t1, t2;
  int    i;

  Type_Size s_size;
  int s_num;
  
#ifdef CHECK
  defect=0.;
#endif
  ierr = 0;

  *time = 0.;

  /*  GET SIZE OF DATA TYPE */  
  MPI_Type_size(c_info->red_data_type,&s_size);
  if (s_size!=0) s_num=size/s_size;
  
  if(c_info->rank!=-1)
  {
      IMB_do_n_barriers (c_info->communicator, N_BARR);

      for(i=0;i< ITERATIONS->n_sample;i++)
      {
          t1 = MPI_Wtime();
          ierr = MPI_Allreduce((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
                               (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
                               s_num,
                               c_info->red_data_type,c_info->op_type,
                               c_info->communicator);
          MPI_ERRHAND(ierr);
          t2 = MPI_Wtime();
          *time += (t2 - t1);

          CHK_DIFF("Allreduce",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0,
                   size, size, asize, 
                   put, 0, ITERATIONS->n_sample, i,
                   -1, &defect);
          
          IMB_do_n_barriers (c_info->communicator, c_info->sync);

      }
      *time /= ITERATIONS->n_sample;
  }
}
Example #22
0
void IMB_shr_spmd_swapm(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                    MODES RUN_MODE, double* time)
/*

                      
                      MPI-1 benchmark kernel
                      Benchmarks shr_spmd_swapm
                      


Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-size                 (type int)                      
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling

-RUN_MODE             (type MODES)                      
                      (only MPI-2 case: see [1])


Output variables: 

-time                 (type double*)                      
                      Timing result per sample


*/
{
    double 	t1, t2;
    int    	i;
    Type_Size 	s_size,r_size;
    int 	s_num, r_num;
    MPI_Datatype s_data_types[c_info->num_procs];
    MPI_Datatype r_data_types[c_info->num_procs];

    static bool handshake, isend, firstpass=true;
    static int maxreqs;

    int flow_cntl;
#ifdef CHECK
    defect=0.;
#endif
    ierr = 0;

    if(firstpass){
      firstpass = false;
      shr_swapm_getenv(&handshake, &isend, &maxreqs);
    }
    if(maxreqs >= 0){
      flow_cntl = min(c_info->num_procs, maxreqs);
    }else{
      flow_cntl = max(2,-1* c_info->num_procs/maxreqs);
    }
    if(c_info->rank==0){
      printf("SPMD_SWAPM: handshake %d isend %d flow_cntl = %d\n",handshake, isend,flow_cntl );
    }

    /*  GET SIZE OF DATA TYPE */  
    MPI_Type_size(c_info->s_data_type,&s_size);
    MPI_Type_size(c_info->r_data_type,&r_size);

    if ((s_size!=0) && (r_size!=0))
    {
	s_num=size/s_size;
	r_num=size/r_size;
    } 

    /* INITIALIZATION OF DISPLACEMENT and SEND/RECEIVE COUNTS */
    for (i=0;i<c_info->num_procs ;i++)
      {
	s_data_types[i] = c_info->s_data_type;
	r_data_types[i] = c_info->r_data_type;

	c_info->sdispl[i] = s_num*i;
	c_info->sndcnt[i] = s_num;
	c_info->rdispl[i] = r_num*i;
	c_info->reccnt[i] = r_num;
    }

    if(c_info->rank!=-1)
    {

	for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);

	t1 = MPI_Wtime();
	for(i=0;i< ITERATIONS->n_sample;i++)
	{
	    ierr = shr_spmd_swapm((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
				 c_info->sndcnt,c_info->sdispl,
				 s_data_types,
				 (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
				 c_info->reccnt,c_info->rdispl,
				 r_data_types,
				  c_info->communicator, handshake, isend, flow_cntl);
	    MPI_ERRHAND(ierr);

	    CHK_DIFF("Alltoallw",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
		     (size_t) c_info->rank* (size_t) size, 0, 
		     (size_t) c_info->num_procs* (size_t) size, 1, 
		     put, 0, ITERATIONS->n_sample, i,
		     -2, &defect);
	}

	t2 = MPI_Wtime();
	*time=(t2 - t1)/ITERATIONS->n_sample;
    }
    else
    { 
	*time = 0.; 
    }
}
Example #23
0
void IMB_reduce_scatter(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                        MODES RUN_MODE, double* time)
/*

                      
                      MPI-1 benchmark kernel
                      Benchmarks MPI_Reduce_scatter
                      


Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-size                 (type int)                      
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling

-RUN_MODE             (type MODES)                      
                      (only MPI-2 case: see [1])


Output variables: 

-time                 (type double*)                      
                      Timing result per sample


*/
{
    double t1, t2;
    int    i;
    size_t pos1,pos2;
#ifdef CHECK
    size_t pos;
    int    Locsize;
#endif

    Type_Size s_size;
  
#ifdef CHECK
    defect=0.;
#endif
    ierr = 0;

    /*  GET SIZE OF DATA TYPE */  
    MPI_Type_size(c_info->red_data_type,&s_size);

    for (i=0;i<c_info->num_procs ;i++)
    {
	if( size > 0)
	{
	    IMB_get_rank_portion(i, c_info->num_procs, size, s_size, 
				 &pos1, &pos2);
	    c_info->reccnt[i] = (pos2-pos1+1)/s_size;
    #ifdef CHECK
	    if( i==c_info->rank ) {pos=pos1; Locsize= s_size*c_info->reccnt[i];}
    #endif
	} else
	{
	    c_info->reccnt[i] = 0;
    #ifdef CHECK
	    if( i==c_info->rank ) {pos=0; Locsize= 0;}
    #endif
	}
    }

    if(c_info->rank!=-1)
    {
	for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);

	t1 = MPI_Wtime();
	for(i=0;i< ITERATIONS->n_sample;i++)
	{
	    ierr = MPI_Reduce_scatter ((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
				       (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
				       c_info->reccnt,
				       c_info->red_data_type,c_info->op_type,
				       c_info->communicator);
	    MPI_ERRHAND(ierr);

	    CHK_DIFF("Reduce_scatter",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
		     pos,
		     Locsize, size, asize,
		     put, 0, ITERATIONS->n_sample, i,
		     -1, &defect);

	}
	t2 = MPI_Wtime();
	*time=(t2 - t1)/ITERATIONS->n_sample;
    } else /*if(c_info->rank==-1)*/
    { 
	*time = 0.; 
    }
}
Example #24
0
void IMB_iallgatherv(struct comm_info* c_info,
                     int size,
                     struct iter_schedule* ITERATIONS,
                     MODES RUN_MODE,
                     double* time)
/*


                      MPI-NBC benchmark kernel
                      Benchmarks MPI_Iallgatherv



Input variables:

-c_info               (type struct comm_info*)
                      Collection of all base data for MPI;
                      see [1] for more information


-size                 (type int)
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling

-RUN_MODE             (type MODES)
                      (only MPI-2 case: see [1])


Output variables:

-time                 (type double*)
                      Timing result per sample


*/
{
    int         i = 0;
    Type_Size   s_size,
                r_size;
    int         s_num = 0,
                r_num = 0;
    MPI_Request request;
    MPI_Status  status;
    double      t_pure = 0.,
                t_comp = 0.,
                t_ovrlp = 0.;

#ifdef CHECK
    defect=0.;
#endif
    ierr = 0;

    /* GET SIZE OF DATA TYPE */
    MPI_Type_size(c_info->s_data_type, &s_size);
    MPI_Type_size(c_info->r_data_type, &r_size);
    if ((s_size != 0) && (r_size != 0)) {
        s_num = size / s_size;
        r_num = size / r_size;
    }

    if(c_info->rank != -1) {
        /* GET PURE TIME. DISPLACEMENT AND RECEIVE COUNT WILL BE INITIALIZED HERE */
        IMB_iallgatherv_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure);

        /* INITIALIZATION CALL */
        IMB_cpu_exploit(t_pure, 1);

        for(i=0; i<N_BARR; i++) {
            MPI_Barrier(c_info->communicator);
        }

        t_ovrlp = MPI_Wtime();
        for(i=0; i < ITERATIONS->n_sample; i++)
        {
            ierr = MPI_Iallgatherv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                   s_num,
                                   c_info->s_data_type,
                                   (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                                   c_info->reccnt,
                                   c_info->rdispl,
                                   c_info->r_data_type,
                                   c_info->communicator,
                                   &request);

            MPI_ERRHAND(ierr);
            t_comp -= MPI_Wtime();
            IMB_cpu_exploit(t_pure, 0);
            t_comp += MPI_Wtime();
            MPI_Wait(&request, &status);
            CHK_DIFF("Iallgatherv", c_info,
                     (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                     0, 0, ((size_t)c_info->num_procs * (size_t)size),
                     1, put, 0, ITERATIONS->n_sample, i, -2, &defect);
        }
        t_ovrlp = (MPI_Wtime() - t_ovrlp) / ITERATIONS->n_sample;
        t_comp /= ITERATIONS->n_sample;
    }

    time[0] = t_pure;
    time[1] = t_ovrlp;
    time[2] = t_comp;
}
Example #25
0
void IMB_ialltoall_pure(struct comm_info* c_info,
                        int size,
                        struct iter_schedule* ITERATIONS,
                        MODES RUN_MODE,
                        double* time)
/*


                      MPI-NBC benchmark kernel
                      Benchmarks MPI_Ialltoall.



Input variables:

-c_info               (type struct comm_info*)
                      Collection of all base data for MPI;
                      see [1] for more information


-size                 (type int)
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling

-RUN_MODE             (type MODES)
                      (only MPI-2 case: see [1])


Output variables:

-time                 (type double*)
                      Timing result per sample


*/
{
    int         i = 0;
    Type_Size   s_size,
                r_size;
    int         s_num = 0,
                r_num;
    MPI_Request request;
    MPI_Status  status;
    double      t_pure = 0.;

#ifdef CHECK
    defect=0.;
#endif
    ierr = 0;

    /* GET SIZE OF DATA TYPE */
    MPI_Type_size(c_info->s_data_type, &s_size);
    MPI_Type_size(c_info->s_data_type, &r_size);
    if ((s_size != 0) && (r_size != 0)) {
        s_num = size / s_size;
        r_num = size / r_size;
    }

    if(c_info->rank != -1) {
        for (i = 0; i < N_BARR; i++) {
            MPI_Barrier(c_info->communicator);
        }

        t_pure = MPI_Wtime();
        for(i = 0; i < ITERATIONS->n_sample; i++)
        {
            ierr = MPI_Ialltoall((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                 s_num,
                                 c_info->s_data_type,
                                 (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                                 r_num,
                                 c_info->r_data_type,
                                 c_info->communicator,
                                 &request);
            MPI_ERRHAND(ierr);
            MPI_Wait(&request, &status);
            CHK_DIFF("Ialltoall_pure", c_info,
                     (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                     ((size_t)c_info->rank * (size_t) size), 0, ((size_t)c_info->num_procs * (size_t)size),
                     1, put, 0, ITERATIONS->n_sample, i, -2, &defect);
        }
        t_pure = (MPI_Wtime() - t_pure) / ITERATIONS->n_sample;
    }

    time[0] = t_pure;
}
Example #26
0
void IMB_ireduce_scatter(struct comm_info* c_info,
                         int size,
                         struct iter_schedule* ITERATIONS,
                         MODES RUN_MODE,
                         double* time)
{
    int         i = 0;
    Type_Size   s_size;
    MPI_Request request;
    MPI_Status  status;
    double      t_pure = 0.,
                t_comp = 0.,
                t_ovrlp = 0.;

#ifdef CHECK
    size_t      pos     = 0,
                pos1    = 0,
                pos2    = 0;
    int         Locsize = 0;

    defect = 0.;
#endif
    ierr = 0;

    /* GET SIZE OF DATA TYPE */
    MPI_Type_size(c_info->red_data_type, &s_size);

#ifdef CHECK
    if(size > 0) {
        for (i = 0; i < c_info->num_procs; i++) {
            IMB_get_rank_portion(i, c_info->num_procs, size, s_size, &pos1, &pos2);
            if (i == c_info->rank) {
                pos = pos1;
                Locsize = s_size * c_info->reccnt[i];
            }
        }
    }
#endif // CHECK

    if(c_info->rank != -1) {
        IMB_ireduce_scatter_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure);

        /* INITIALIZATION CALL */
        IMB_cpu_exploit(t_pure, 1);

        for(i=0; i < N_BARR; i++) {
            MPI_Barrier(c_info->communicator);
        }

        t_ovrlp = MPI_Wtime();
        for(i = 0; i < ITERATIONS->n_sample; i++) {
            ierr = MPI_Ireduce_scatter((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                       (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                                       c_info->reccnt,
                                       c_info->red_data_type,
                                       c_info->op_type,
                                       c_info->communicator,
                                       &request);
            MPI_ERRHAND(ierr);
            t_comp -= MPI_Wtime();
            IMB_cpu_exploit(t_pure, 0);
            t_comp += MPI_Wtime();
            MPI_Wait(&request, &status);
            CHK_DIFF("Ireduce_scatter", c_info,
                     (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                     pos, Locsize, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect);
        }
        t_ovrlp = (MPI_Wtime() - t_ovrlp) / ITERATIONS->n_sample;
        t_comp /= ITERATIONS->n_sample;
    }

    time[0] = t_pure;
    time[1] = t_ovrlp;
    time[2] = t_comp;
}
void IMB_pingping(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                  MODES RUN_MODE, double* time)
/*

                      
                      MPI-1 benchmark kernel
                      2 process exchange; MPI_Isend + MPI_Recv 
                      


Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-size                 (type int)                      
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling

-RUN_MODE             (type MODES)                      
                      (only MPI-2 case: see [1])


Output variables: 

-time                 (type double*)                      
                      Timing result per sample


*/
{
  double t1,t2;
  int    i;
  
  Type_Size s_size, r_size;
  int s_num, r_num;
  int s_tag, r_tag;
  int dest, source;
  MPI_Status stat;
  MPI_Request request;

#ifdef CHECK 
  defect=0;
#endif
  ierr = 0;

  MPI_Type_size(c_info->s_data_type,&s_size);
  MPI_Type_size(c_info->r_data_type,&r_size);

  if ((s_size!=0) && (r_size!=0))
   {
      s_num=size/s_size;
      r_num=size/r_size;
    } 
  s_tag = 1;
  r_tag = c_info->select_tag ? s_tag : MPI_ANY_TAG;
  
  dest = -1;
  if (c_info->rank == c_info->pair0)
      dest = c_info->pair1;
  else if (c_info->rank == c_info->pair1)
      dest = c_info->pair0;

  source = c_info->select_source ? dest : MPI_ANY_SOURCE;
      
  if( dest != -1 )
    {
      for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);

      t1 = MPI_Wtime();
      for(i=0;i< ITERATIONS->n_sample;i++)
	{
	  ierr= MPI_Isend((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
                          s_num,
			  c_info->s_data_type,dest,s_tag,
			  c_info->communicator,&request);
	  MPI_ERRHAND(ierr);
	  ierr = MPI_Recv((char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
                          r_num,c_info->r_data_type,source,
			  r_tag,c_info->communicator,&stat);
	  MPI_ERRHAND(ierr);

	  ierr = MPI_Wait(&request, &stat);
	  MPI_ERRHAND(ierr);

          CHK_DIFF("PingPing",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
                    0, size, size, asize,
                    put, 0, ITERATIONS->n_sample, i,
                    dest, &defect);
	}
      t2 = MPI_Wtime();
      
      *time=(t2 - t1)/ITERATIONS->n_sample;
    }
  else
    { 
      *time = 0.; 
    }
}
Example #28
0
void IMB_ireduce_scatter_pure(struct comm_info* c_info,
                              int size,
                              struct iter_schedule* ITERATIONS,
                              MODES RUN_MODE,
                              double* time)
{
    int         i = 0;
    Type_Size   s_size;
    size_t      pos1 = 0,
                pos2 = 0;
    MPI_Request request;
    MPI_Status  status;
    double      t_pure = 0.;

#ifdef CHECK
    size_t      pos = 0;
    int         Locsize = 0;

    defect = 0.;
#endif
    ierr = 0;

    /* GET SIZE OF DATA TYPE */
    MPI_Type_size(c_info->red_data_type, &s_size);

    for (i = 0; i < c_info->num_procs; i++) {
        if( size > 0) {
            IMB_get_rank_portion(i, c_info->num_procs, size, s_size, &pos1, &pos2);
            c_info->reccnt[i] = (pos2 - pos1 + 1) / s_size;
#ifdef CHECK
            if (i == c_info->rank) {
                pos = pos1;
                Locsize = s_size * c_info->reccnt[i];
            }
#endif
        } else {
            c_info->reccnt[i] = 0;
#ifdef CHECK
            if (i == c_info->rank) {
                pos=0;
                Locsize = 0;
            }
#endif
        }
    }

    if(c_info->rank != -1) {
        for (i = 0; i < N_BARR; i++) {
            MPI_Barrier(c_info->communicator);
        }

        t_pure = MPI_Wtime();
        for(i = 0; i < ITERATIONS->n_sample; i++) {
            ierr = MPI_Ireduce_scatter((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                       (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                                       c_info->reccnt,
                                       c_info->red_data_type,
                                       c_info->op_type,
                                       c_info->communicator,
                                       &request);
            MPI_ERRHAND(ierr);
            MPI_Wait(&request, &status);
            CHK_DIFF("Ireduce_scatter_pure", c_info,
                     (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                     pos, Locsize, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect);
        }
        t_pure = (MPI_Wtime() - t_pure) / ITERATIONS->n_sample;
    }

    time[0] = t_pure;
}
Example #29
0
void Output(struct comm_info* c_info,struct Bench* Bmark,MODES BMODE,
	    int tmp_NP, int header,int size,int n_sample,double *time)
/*****************************************************************/

/*-----------------------------------------------------------------
             VARIABLE |       TYPE        |   MEANING
-------------------------------------------------------------------
Input      :  c_info  | struct comm_info* | see comm_info.h 
              Bmark   | struct Bench*     | current enchmark
              tmp_NP  | int               | number of nodes
	      header  | int               | first call or not (header flag)
              size    | int               | message length in byte
	      n_sample| int               | repetition count
	      time    | double*           | time of measurement
	              |                   |
Output     :          |                   |
                      |                   |
In/Out     :  -       | -                 | -
                      |                   |  
-------------------------------------------------------------------
-------------------------------------------------------------------
Description: Output of results (header and bare data of measurement)
-----------------------------------------------------------------*/
{
  double scaled_time[MAX_TIMINGS];
  
  int DO_OUT;
  int GROUP_OUT;
  int i,i_gr;
  int li_len;
  int edit_type;
  
  ierr = 0;

  DO_OUT    = (c_info->w_rank  == 0 );
  GROUP_OUT = (c_info->group_mode > 0 );

  if (DO_OUT) 
    {
      if(!all_times)
	{
          all_times = 
  (double*)v_alloc(c_info->w_num_procs * Bmark->Ntimes * sizeof(double), 
                  "Output 1");
	}
#ifdef CHECK
      if(!all_defect)
	{
          all_defect = (double*)v_alloc(c_info->w_num_procs * sizeof(double), 
                  "Output 1");
          for(i=0; i<c_info->w_num_procs; i++) all_defect[i]=0.;
	}
#endif  	  
    }

/* Scale the timings */
  for(i=0; i<Bmark->Ntimes;  i++)
  scaled_time[i] = time[i] * SCALE * Bmark->scale_time;


/* collect all times  */
  ierr=MPI_Gather(scaled_time,Bmark->Ntimes,MPI_DOUBLE,all_times,Bmark->Ntimes,MPI_DOUBLE,0,MPI_COMM_WORLD);
  MPI_ERRHAND(ierr);

#ifdef CHECK      
/* collect all defects */	      
  ierr=MPI_Gather(&defect,1,MPI_DOUBLE,all_defect,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
  MPI_ERRHAND(ierr);

#endif
  if( DO_OUT )
    {
      BTYPES type= Bmark->RUN_MODES[0].type;
      if ( Bmark->RUN_MODES[0].NONBLOCKING )
           edit_type = 4;
      else if ( type == SingleTransfer && c_info->group_mode != 0 )
           edit_type=0;
      else if ( type == ParallelTransfer || type == SingleTransfer )
           edit_type=1;
      else if (type == Collective )
#ifdef MPIIO
           edit_type=1;
#else
           edit_type=2;
#endif
      else 
           edit_type=3;

      if( header )
	{
        fprintf(unit,"\n");            /* FOR GNUPLOT: CURVE SEPERATOR  */


          if( GROUP_OUT ) {strcpy(aux_string,"&Group") ; li_len=1;}
          else            {strcpy(aux_string,"");  li_len=0;}
	  if ( edit_type == 0 )
	    {
	      li_len+=4;
	      strcat(aux_string,"&#bytes&#repetitions&t[usec]&Mbytes/sec&");
	    }
	  else if ( edit_type == 1 )
	    {
	      li_len+=6;
	      strcat(aux_string,
		     "&#bytes&#repetitions&t_min[usec]&t_max[usec]&t_avg[usec]&Mbytes/sec&");
	    }
	  else if ( edit_type == 2 )
	    {
	      li_len+=5;
	      strcat(aux_string,
		     "&#bytes&#repetitions&t_min[usec]&t_max[usec]&t_avg[usec]&");
	    }
	  else if ( edit_type == 3 )
	    {
	      li_len+=4;
	      strcat(aux_string,
		     "&#repetitions&t_min[usec]&t_max[usec]&t_avg[usec]&");
	    }
          else
            {
	      li_len+=6;
	      strcat(aux_string,
		     "&#bytes&#repetitions&t_ovrl[usec]&t_pure[usec]&t_CPU[usec]& overlap[%]&");
            }
#ifdef CHECK
          if( Bmark->RUN_MODES[0].type != Sync &&
              strcmp(Bmark->name,"Window") )
          {
	  li_len+=1;
	  strcat(aux_string,"&defects&");
          }
#endif
        Make_Line(li_len);
        if( c_info->n_groups > 1) 
        fprintf(unit,"# Benchmarking Multi-%s ",Bmark->name);
        else
        fprintf(unit,"# Benchmarking %s ",Bmark->name);
        Show_Procids(c_info); 

        Make_Line(li_len);

        switch(BMODE->AGGREGATE)
          {
          case 1:
          fprintf(unit,"#\n#    MODE: AGGREGATE \n#\n");
          break;
          case 0:
          fprintf(unit,"#\n#    MODE: NON-AGGREGATE \n#\n");
          break;
          }
	  Print_Headlines(c_info,tmp_NP,Bmark->name,li_len,aux_string);
	}     



      if( GROUP_OUT )
      {


      for( i_gr=0; i_gr<c_info->n_groups; i_gr++ )
	{
	  if(i_gr == 0) fprintf(unit,"\n");

    	  Display_Times(Bmark, all_times, c_info, i_gr, n_sample, size, edit_type);
	} 
      }
      else
    	  Display_Times(Bmark, all_times, c_info,  0, n_sample, size, edit_type);
    } 
Example #30
0
void IMB_sendrecv(struct comm_info* c_info, int size,  struct iter_schedule* ITERATIONS,
                  MODES RUN_MODE, double* time)
/*

                      
                      MPI-1 benchmark kernel
                      Benchmarks MPI_Sendrecv
                      


Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-size                 (type int)                      
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling

-RUN_MODE             (type MODES)                      
                      (only MPI-2 case: see [1])


Output variables: 

-time                 (type double*)                      
                      Timing result per sample


*/
{
  double t1,t2;
  int i;
  Type_Size s_size, r_size;
  int s_num,r_num;
  int s_tag, r_tag;
  int dest, source;
  MPI_Status stat;

#ifdef CHECK 
  defect=0;
#endif
  ierr = 0;

  /*  GET SIZE OF DATA TYPE's in s_size and r_size */  
  MPI_Type_size(c_info->s_data_type,&s_size);
  MPI_Type_size(c_info->r_data_type,&r_size);
  if ((s_size!=0) && (r_size!=0))
    {
      s_num=size/s_size;
      r_num=size/r_size;
    }   
  s_tag = 1;
  r_tag = MPI_ANY_TAG;
  
  if(c_info->rank!=-1)
    {  
      /*  CALCULATE SOURCE AND DESTINATION */  
      dest   = (c_info->rank + 1)                   % (c_info->num_procs);
      source = (c_info->rank + c_info->num_procs-1) % (c_info->num_procs);

      for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
      
      t1 = MPI_Wtime();
      for(i=0;i< ITERATIONS->n_sample;i++)
	{
	  ierr= MPI_Sendrecv((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
                             s_num,c_info->s_data_type, dest,s_tag,
                             (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
                             r_num,c_info->r_data_type,source,r_tag,
			     c_info->communicator,&stat);
	  MPI_ERRHAND(ierr);

          CHK_DIFF("Sendrecv",c_info,(char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
                    0, size, size, asize,
                    put, 0, ITERATIONS->n_sample, i,
                    source, &defect);
	}
      t2 = MPI_Wtime();
      *time=(t2 - t1)/ITERATIONS->n_sample;
    }
  else
    { 
      *time = 0.;
    }
}