void IMB_iallreduce_pure(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) { int i = 0; Type_Size s_size; int s_num = 0; MPI_Request request; MPI_Status status; double t_pure = 0.; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->red_data_type, &s_size); if (s_size != 0) { s_num = size / s_size; } if(c_info->rank != -1) { IMB_do_n_barriers (c_info->communicator, N_BARR); for(i = 0; i < ITERATIONS->n_sample; i++) { t_pure -= MPI_Wtime(); ierr = MPI_Iallreduce((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, s_num, c_info->red_data_type, c_info->op_type, c_info->communicator, &request); MPI_ERRHAND(ierr); MPI_Wait(&request, &status); t_pure += MPI_Wtime(); CHK_DIFF("Iallreduce_pure", c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, 0, size, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect); IMB_do_n_barriers (c_info->communicator, c_info->sync); } t_pure /= ITERATIONS->n_sample; } time[0] = t_pure; }
void IMB_reduce_scatter(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-1 benchmark kernel Benchmarks MPI_Reduce_scatter Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { double t1, t2; int i; size_t pos1,pos2; #ifdef CHECK size_t pos; int Locsize; #endif Type_Size s_size; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->red_data_type,&s_size); for (i=0;i<c_info->num_procs ;i++) { if( size > 0) { IMB_get_rank_portion(i, c_info->num_procs, size, s_size, &pos1, &pos2); c_info->reccnt[i] = (pos2-pos1+1)/s_size; #ifdef CHECK if( i==c_info->rank ) {pos=pos1; Locsize= s_size*c_info->reccnt[i];} #endif } else { c_info->reccnt[i] = 0; #ifdef CHECK if( i==c_info->rank ) {pos=0; Locsize= 0;} #endif } } if(c_info->rank!=-1) { for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator); t1 = MPI_Wtime(); for(i=0;i< ITERATIONS->n_sample;i++) { ierr = MPI_Reduce_scatter ((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, c_info->reccnt, c_info->red_data_type,c_info->op_type, c_info->communicator); MPI_ERRHAND(ierr); CHK_DIFF("Reduce_scatter",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, pos, Locsize, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect); } t2 = MPI_Wtime(); *time=(t2 - t1)/ITERATIONS->n_sample; } else /*if(c_info->rank==-1)*/ { *time = 0.; } }
void IMB_igatherv_pure(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-NBC benchmark kernel Benchmarks IMB_Igatherv_pure Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { int i = 0; Type_Size s_size, r_size; int s_num = 0, r_num; MPI_Request request; MPI_Status status; double t_pure = 0.; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->s_data_type, &s_size); MPI_Type_size(c_info->s_data_type, &r_size); if ((s_size != 0) && (r_size != 0)) { s_num = size / s_size; r_num = size / r_size; } /* INITIALIZATION OF DISPLACEMENT and RECEIVE COUNTS */ for (i = 0; i < c_info->num_procs; ++i) { c_info->rdispl[i] = r_num * i; c_info->reccnt[i] = r_num; } if(c_info->rank != -1) { for (i = 0; i < N_BARR; i++) { MPI_Barrier(c_info->communicator); } t_pure = MPI_Wtime(); for(i = 0; i < ITERATIONS->n_sample; i++) { ierr = MPI_Igatherv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, s_num, c_info->s_data_type, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, c_info->reccnt, c_info->rdispl, c_info->r_data_type, i % c_info->num_procs, // root = round robin c_info->communicator, &request); MPI_ERRHAND(ierr); MPI_Wait(&request, &status); #ifdef CHECK if (c_info->rank == i % c_info->num_procs) { CHK_DIFF("Igatherv_pure", c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, 0, 0, ((size_t)c_info->num_procs * (size_t)size), 1, put, 0, ITERATIONS->n_sample, i, -2, &defect); } #endif // CHECK } t_pure = (MPI_Wtime() - t_pure) / ITERATIONS->n_sample; } time[0] = t_pure; }
void IMB_allreduce(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-1 benchmark kernel Benchmarks MPI_Allreduce Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { double t1, t2; int i; Type_Size s_size; int s_num; #ifdef CHECK defect=0.; #endif ierr = 0; *time = 0.; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->red_data_type,&s_size); if (s_size!=0) s_num=size/s_size; if(c_info->rank!=-1) { IMB_do_n_barriers (c_info->communicator, N_BARR); for(i=0;i< ITERATIONS->n_sample;i++) { t1 = MPI_Wtime(); ierr = MPI_Allreduce((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, s_num, c_info->red_data_type,c_info->op_type, c_info->communicator); MPI_ERRHAND(ierr); t2 = MPI_Wtime(); *time += (t2 - t1); CHK_DIFF("Allreduce",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0, size, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect); IMB_do_n_barriers (c_info->communicator, c_info->sync); } *time /= ITERATIONS->n_sample; } }
void IMB_sendrecv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-1 benchmark kernel Benchmarks MPI_Sendrecv Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { double t1,t2; int i; Type_Size s_size, r_size; int s_num,r_num; int s_tag, r_tag; int dest, source; MPI_Status stat; #ifdef CHECK defect=0; #endif ierr = 0; /* GET SIZE OF DATA TYPE's in s_size and r_size */ MPI_Type_size(c_info->s_data_type,&s_size); MPI_Type_size(c_info->r_data_type,&r_size); if ((s_size!=0) && (r_size!=0)) { s_num=size/s_size; r_num=size/r_size; } s_tag = 1; r_tag = MPI_ANY_TAG; if(c_info->rank!=-1) { /* CALCULATE SOURCE AND DESTINATION */ dest = (c_info->rank + 1) % (c_info->num_procs); source = (c_info->rank + c_info->num_procs-1) % (c_info->num_procs); for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator); t1 = MPI_Wtime(); for(i=0;i< ITERATIONS->n_sample;i++) { ierr= MPI_Sendrecv((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs, s_num,c_info->s_data_type, dest,s_tag, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, r_num,c_info->r_data_type,source,r_tag, c_info->communicator,&stat); MPI_ERRHAND(ierr); CHK_DIFF("Sendrecv",c_info,(char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0, size, size, asize, put, 0, ITERATIONS->n_sample, i, source, &defect); } t2 = MPI_Wtime(); *time=(t2 - t1)/ITERATIONS->n_sample; } else { *time = 0.; } }
void IMB_bcast(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-1 benchmark kernel Benchmarks MPI_Bcast Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { double t1, t2; int i,i1; Type_Size s_size; int s_num; void* bc_buf; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->s_data_type,&s_size); if (s_size!=0) s_num=size/s_size; if(c_info->rank!=-1) { i1=0; for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator); t1 = MPI_Wtime(); for(i=0;i< ITERATIONS->n_sample;i++) { /* Provide that s_buffer is not overwritten */ bc_buf = (i1 == c_info->rank) ? c_info->s_buffer : c_info->r_buffer; ierr= MPI_Bcast((char*)bc_buf+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs, s_num,c_info->s_data_type, i1,c_info->communicator); MPI_ERRHAND(ierr); CHK_DIFF("Bcast", c_info, (char*)bc_buf + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, 0, size, size, 1, put, 0, ITERATIONS->n_sample, i, i1, &defect); /* CHANGE THE ROOT NODE */ i1=(++i1)%c_info->num_procs; } t2 = MPI_Wtime(); *time=(t2 - t1)/(ITERATIONS->n_sample); } else { *time = 0.; } }
void IMB_ireduce_scatter(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) { int i = 0; Type_Size s_size; MPI_Request request; MPI_Status status; double t_pure = 0., t_comp = 0., t_ovrlp = 0.; #ifdef CHECK size_t pos = 0, pos1 = 0, pos2 = 0; int Locsize = 0; defect = 0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->red_data_type, &s_size); #ifdef CHECK if(size > 0) { for (i = 0; i < c_info->num_procs; i++) { IMB_get_rank_portion(i, c_info->num_procs, size, s_size, &pos1, &pos2); if (i == c_info->rank) { pos = pos1; Locsize = s_size * c_info->reccnt[i]; } } } #endif // CHECK if(c_info->rank != -1) { IMB_ireduce_scatter_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure); /* INITIALIZATION CALL */ IMB_cpu_exploit(t_pure, 1); for(i=0; i < N_BARR; i++) { MPI_Barrier(c_info->communicator); } t_ovrlp = MPI_Wtime(); for(i = 0; i < ITERATIONS->n_sample; i++) { ierr = MPI_Ireduce_scatter((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, c_info->reccnt, c_info->red_data_type, c_info->op_type, c_info->communicator, &request); MPI_ERRHAND(ierr); t_comp -= MPI_Wtime(); IMB_cpu_exploit(t_pure, 0); t_comp += MPI_Wtime(); MPI_Wait(&request, &status); CHK_DIFF("Ireduce_scatter", c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, pos, Locsize, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect); } t_ovrlp = (MPI_Wtime() - t_ovrlp) / ITERATIONS->n_sample; t_comp /= ITERATIONS->n_sample; } time[0] = t_pure; time[1] = t_ovrlp; time[2] = t_comp; }
void IMB_pingping(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-1 benchmark kernel 2 process exchange; MPI_Isend + MPI_Recv Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { double t1,t2; int i; Type_Size s_size, r_size; int s_num, r_num; int s_tag, r_tag; int dest, source; MPI_Status stat; MPI_Request request; #ifdef CHECK defect=0; #endif ierr = 0; MPI_Type_size(c_info->s_data_type,&s_size); MPI_Type_size(c_info->r_data_type,&r_size); if ((s_size!=0) && (r_size!=0)) { s_num=size/s_size; r_num=size/r_size; } s_tag = 1; r_tag = c_info->select_tag ? s_tag : MPI_ANY_TAG; dest = -1; if (c_info->rank == c_info->pair0) dest = c_info->pair1; else if (c_info->rank == c_info->pair1) dest = c_info->pair0; source = c_info->select_source ? dest : MPI_ANY_SOURCE; if( dest != -1 ) { for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator); t1 = MPI_Wtime(); for(i=0;i< ITERATIONS->n_sample;i++) { ierr= MPI_Isend((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs, s_num, c_info->s_data_type,dest,s_tag, c_info->communicator,&request); MPI_ERRHAND(ierr); ierr = MPI_Recv((char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, r_num,c_info->r_data_type,source, r_tag,c_info->communicator,&stat); MPI_ERRHAND(ierr); ierr = MPI_Wait(&request, &stat); MPI_ERRHAND(ierr); CHK_DIFF("PingPing",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0, size, size, asize, put, 0, ITERATIONS->n_sample, i, dest, &defect); } t2 = MPI_Wtime(); *time=(t2 - t1)/ITERATIONS->n_sample; } else { *time = 0.; } }
void IMB_shr_spmd_swapm(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-1 benchmark kernel Benchmarks shr_spmd_swapm Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { double t1, t2; int i; Type_Size s_size,r_size; int s_num, r_num; MPI_Datatype s_data_types[c_info->num_procs]; MPI_Datatype r_data_types[c_info->num_procs]; static bool handshake, isend, firstpass=true; static int maxreqs; int flow_cntl; #ifdef CHECK defect=0.; #endif ierr = 0; if(firstpass){ firstpass = false; shr_swapm_getenv(&handshake, &isend, &maxreqs); } if(maxreqs >= 0){ flow_cntl = min(c_info->num_procs, maxreqs); }else{ flow_cntl = max(2,-1* c_info->num_procs/maxreqs); } if(c_info->rank==0){ printf("SPMD_SWAPM: handshake %d isend %d flow_cntl = %d\n",handshake, isend,flow_cntl ); } /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->s_data_type,&s_size); MPI_Type_size(c_info->r_data_type,&r_size); if ((s_size!=0) && (r_size!=0)) { s_num=size/s_size; r_num=size/r_size; } /* INITIALIZATION OF DISPLACEMENT and SEND/RECEIVE COUNTS */ for (i=0;i<c_info->num_procs ;i++) { s_data_types[i] = c_info->s_data_type; r_data_types[i] = c_info->r_data_type; c_info->sdispl[i] = s_num*i; c_info->sndcnt[i] = s_num; c_info->rdispl[i] = r_num*i; c_info->reccnt[i] = r_num; } if(c_info->rank!=-1) { for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator); t1 = MPI_Wtime(); for(i=0;i< ITERATIONS->n_sample;i++) { ierr = shr_spmd_swapm((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs, c_info->sndcnt,c_info->sdispl, s_data_types, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, c_info->reccnt,c_info->rdispl, r_data_types, c_info->communicator, handshake, isend, flow_cntl); MPI_ERRHAND(ierr); CHK_DIFF("Alltoallw",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, (size_t) c_info->rank* (size_t) size, 0, (size_t) c_info->num_procs* (size_t) size, 1, put, 0, ITERATIONS->n_sample, i, -2, &defect); } t2 = MPI_Wtime(); *time=(t2 - t1)/ITERATIONS->n_sample; } else { *time = 0.; } }
void IMB_iallgatherv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-NBC benchmark kernel Benchmarks MPI_Iallgatherv Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { int i = 0; Type_Size s_size, r_size; int s_num = 0, r_num = 0; MPI_Request request; MPI_Status status; double t_pure = 0., t_comp = 0., t_ovrlp = 0.; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->s_data_type, &s_size); MPI_Type_size(c_info->r_data_type, &r_size); if ((s_size != 0) && (r_size != 0)) { s_num = size / s_size; r_num = size / r_size; } if(c_info->rank != -1) { /* GET PURE TIME. DISPLACEMENT AND RECEIVE COUNT WILL BE INITIALIZED HERE */ IMB_iallgatherv_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure); /* INITIALIZATION CALL */ IMB_cpu_exploit(t_pure, 1); for(i=0; i<N_BARR; i++) { MPI_Barrier(c_info->communicator); } t_ovrlp = MPI_Wtime(); for(i=0; i < ITERATIONS->n_sample; i++) { ierr = MPI_Iallgatherv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, s_num, c_info->s_data_type, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, c_info->reccnt, c_info->rdispl, c_info->r_data_type, c_info->communicator, &request); MPI_ERRHAND(ierr); t_comp -= MPI_Wtime(); IMB_cpu_exploit(t_pure, 0); t_comp += MPI_Wtime(); MPI_Wait(&request, &status); CHK_DIFF("Iallgatherv", c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, 0, 0, ((size_t)c_info->num_procs * (size_t)size), 1, put, 0, ITERATIONS->n_sample, i, -2, &defect); } t_ovrlp = (MPI_Wtime() - t_ovrlp) / ITERATIONS->n_sample; t_comp /= ITERATIONS->n_sample; } time[0] = t_pure; time[1] = t_ovrlp; time[2] = t_comp; }
void IMB_gatherv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-1 benchmark kernel Benchmarks MPI_Gatherv Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { double t1, t2; int i; Type_Size s_size,r_size; int s_num, r_num; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->s_data_type,&s_size); MPI_Type_size(c_info->r_data_type,&r_size); if ((s_size!=0) && (r_size!=0)) { s_num=size/s_size; r_num=size/r_size; } /* INITIALIZATION OF DISPLACEMENT and RECEIVE COUNTS */ for (i=0;i<c_info->num_procs ;i++) { c_info->rdispl[i] = r_num*i; c_info->reccnt[i] = r_num; } *time = 0.; if(c_info->rank!=-1) { int root = 0; IMB_do_n_barriers(c_info->communicator, N_BARR); for(i=0;i<ITERATIONS->n_sample;i++) { t1 = MPI_Wtime(); ierr = MPI_Gatherv((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs, s_num,c_info->s_data_type, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, c_info->reccnt,c_info->rdispl, c_info->r_data_type, root, c_info->communicator); MPI_ERRHAND(ierr); t2 = MPI_Wtime(); *time += (t2 - t1); #ifdef CHECK if( c_info->rank == root ) { CHK_DIFF("Gatherv",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0, 0, (size_t) c_info->num_procs * (size_t) size, 1, put, 0, ITERATIONS->n_sample, i, -2, &defect); } #endif root = (root + c_info->root_shift) % c_info->num_procs; IMB_do_n_barriers(c_info->communicator, c_info->sync); } *time /= ITERATIONS->n_sample; } }
void IMB_accumulate (struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-2 benchmark kernel Benchmarks MPI_Accumulate Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) Mode (aggregate/non aggregate; blocking/nonblocking); see "IMB_benchmark.h" for definition Output variables: -time (type double*) Timing result per sample */ { double t1, t2; Type_Size s_size,r_size; int s_num, r_num; /* IMB 3.1 << */ int r_off; /* >> IMB 3.1 */ int s_tag, r_tag; int dest, source, root; int i; MPI_Status stat; #ifdef CHECK defect=0; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->red_data_type,&s_size); /* IMB 3.1 << */ s_num=size/s_size; r_size=s_size; r_num=s_num; r_off=ITERATIONS->r_offs/r_size; /* >> IMB 3.1 */ root = (c_info-> rank == 0); if( c_info-> rank < 0 ) *time = 0.; else { if( !RUN_MODE->AGGREGATE ) { *time = MPI_Wtime(); for(i=0;i< ITERATIONS->n_sample;i++) { ierr = MPI_Accumulate( (char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs, s_num, c_info->red_data_type, 0, i%ITERATIONS->r_cache_iter*r_off, r_num, c_info->red_data_type, c_info->op_type, c_info->WIN ); MPI_ERRHAND(ierr); ierr = MPI_Win_fence(0, c_info->WIN); MPI_ERRHAND(ierr); #ifdef CHECK if( root ) { CHK_DIFF("Accumulate",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0, size, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect); IMB_ass_buf((char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0, 0, size-1, 0); } MPI_Barrier(c_info->communicator); #endif } *time=(MPI_Wtime()-*time)/ITERATIONS->n_sample; } if( RUN_MODE->AGGREGATE ) { for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator); *time = MPI_Wtime(); #ifdef CHECK for(i=0;i< ITERATIONS->r_cache_iter; i++) #else for(i=0;i< ITERATIONS->n_sample;i++) #endif { ierr = MPI_Accumulate( (char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs, s_num, c_info->red_data_type, 0, i%ITERATIONS->r_cache_iter*r_off, r_num, c_info->red_data_type, c_info->op_type, c_info->WIN ); MPI_ERRHAND(ierr); } ierr = MPI_Win_fence(0, c_info->WIN); MPI_ERRHAND(ierr); *time=(MPI_Wtime()-*time)/ITERATIONS->n_sample; #ifdef CHECK if( root ) { for(i=0;i< ITERATIONS->r_cache_iter; i++) { CHK_DIFF("Accumulate", c_info, (char*)c_info->r_buffer+i*ITERATIONS->r_offs, 0, size, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect); } } #endif } } }
void IMB_exchange(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-1 benchmark kernel Chainwise exchange; MPI_Isend (left+right) + MPI_Recv (right+left) Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { double t1, t2; int i; Type_Size s_size, r_size; int s_num, r_num; int s_tag, r_tag; int left, right; MPI_Status stat[2]; MPI_Request request[2]; #ifdef CHECK defect=0; #endif ierr = 0; /*GET SIZE OF DATA TYPE's in s_size and r_size*/ MPI_Type_size(c_info->s_data_type,&s_size); MPI_Type_size(c_info->r_data_type,&r_size); if ((s_size!=0) && (r_size!=0)) { s_num=size/s_size; r_num=size/r_size; } s_tag = 1; r_tag = c_info->select_tag ? s_tag : MPI_ANY_TAG; if(c_info->rank != -1) { if(c_info->rank < c_info->num_procs-1) right = c_info->rank+1; if(c_info->rank > 0) left = c_info->rank-1; if(c_info->rank == c_info->num_procs-1) right = 0; if(c_info->rank == 0) left = c_info->num_procs-1 ; if((c_info->rank >= 0) && (c_info->rank <= c_info->num_procs-1)) { for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator); t1 = MPI_Wtime(); for(i=0; i< ITERATIONS->n_sample; i++) { ierr= MPI_Isend((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs, s_num,c_info->s_data_type, right,s_tag,c_info->communicator,&request[0]); MPI_ERRHAND(ierr); ierr= MPI_Isend((char*)c_info->s_buffer+size+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs, s_num,c_info->s_data_type, left ,s_tag,c_info->communicator,&request[1]); MPI_ERRHAND(ierr); ierr= MPI_Recv( (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, r_num,c_info->r_data_type, left ,r_tag,c_info->communicator,stat); MPI_ERRHAND(ierr); CHK_DIFF("Exchange",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0, size, size, 1, put, 0, ITERATIONS->n_sample, i, left, &defect); ierr= MPI_Recv( (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, r_num,c_info->r_data_type, right,r_tag,c_info->communicator,stat); MPI_ERRHAND(ierr); CHK_DIFF("Exchange",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, s_num, size, size, 1, put, 0, ITERATIONS->n_sample, i, right, &defect); ierr= MPI_Waitall(2,request,stat); MPI_ERRHAND(ierr); } t2 = MPI_Wtime(); *time=(t2 - t1)/ITERATIONS->n_sample; } } else { *time = 0.; } }
void Accumulate (struct comm_info* c_info, int size,int n_sample,MODES RUN_MODE,double* time) /*************************************************************************/ /*------------------------------------------------------------ VARIABLE | TYPE | MEANING ------------------------------------------------------------ Input : c_info | struct comm_info* | see comm_info.h size | int | message length in byte n_sample | int | repetition count RUN_MODE | MODES (typedef, | Distinction aggregate/ | see Benchmark.h) | non aggr., see docu. | | Output : time | double* | *time: time/sample in usec | | In/Out : - | - | - | | ------------------------------------------------------------ ------------------------------------------------------------ Description: see the accompanying document -------------------------------------------------------------*/ { double t1, t2; Type_Size s_size,r_size; int s_num, r_num; int s_tag, r_tag; int dest, source, root; int i; MPI_Status stat; #ifdef CHECK defect=0; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->red_data_type,&s_size); if (s_size!=0) s_num=size/s_size; root = (c_info-> rank == 0); if( c_info-> rank < 0 ) *time = 0.; else { if( !RUN_MODE->AGGREGATE ) { *time = MPI_Wtime(); for(i=0;i< n_sample;i++) { ierr = MPI_Accumulate (c_info->s_buffer, s_num, c_info->red_data_type, 0, i*s_num, s_num, c_info->red_data_type, c_info->op_type, c_info->WIN ); MPI_ERRHAND(ierr); ierr = MPI_Win_fence(0, c_info->WIN); MPI_ERRHAND(ierr); #ifdef CHECK if( root ) { CHK_DIFF("Accumulate",c_info, (void*)(c_info->r_data+i*s_num), 0, size, size, asize, put, 0, n_sample, i, -1, &defect); ass_buf(c_info->r_buffer, 0, 0, size-1, 0); } MPI_Barrier(c_info->communicator); #endif } *time=(MPI_Wtime()-*time)/n_sample; } if( RUN_MODE->AGGREGATE ) { for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator); *time = MPI_Wtime(); for(i=0;i< n_sample;i++) { ierr = MPI_Accumulate ((void*)(c_info->s_data+i*s_num), s_num, c_info->red_data_type, 0, i*s_num, s_num, c_info->red_data_type, c_info->op_type, c_info->WIN ); MPI_ERRHAND(ierr); } ierr = MPI_Win_fence(0, c_info->WIN); MPI_ERRHAND(ierr); *time=(MPI_Wtime()-*time)/n_sample; #ifdef CHECK if( root ) { CHK_DIFF("Accumulate",c_info, c_info->r_buffer, 0, n_sample*size, n_sample*size, asize, put, 0, n_sample, -1, -1, &defect); } #endif } } }
void IMB_ireduce_scatter_pure(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) { int i = 0; Type_Size s_size; size_t pos1 = 0, pos2 = 0; MPI_Request request; MPI_Status status; double t_pure = 0.; #ifdef CHECK size_t pos = 0; int Locsize = 0; defect = 0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->red_data_type, &s_size); for (i = 0; i < c_info->num_procs; i++) { if( size > 0) { IMB_get_rank_portion(i, c_info->num_procs, size, s_size, &pos1, &pos2); c_info->reccnt[i] = (pos2 - pos1 + 1) / s_size; #ifdef CHECK if (i == c_info->rank) { pos = pos1; Locsize = s_size * c_info->reccnt[i]; } #endif } else { c_info->reccnt[i] = 0; #ifdef CHECK if (i == c_info->rank) { pos=0; Locsize = 0; } #endif } } if(c_info->rank != -1) { for (i = 0; i < N_BARR; i++) { MPI_Barrier(c_info->communicator); } t_pure = MPI_Wtime(); for(i = 0; i < ITERATIONS->n_sample; i++) { ierr = MPI_Ireduce_scatter((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, c_info->reccnt, c_info->red_data_type, c_info->op_type, c_info->communicator, &request); MPI_ERRHAND(ierr); MPI_Wait(&request, &status); CHK_DIFF("Ireduce_scatter_pure", c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, pos, Locsize, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect); } t_pure = (MPI_Wtime() - t_pure) / ITERATIONS->n_sample; } time[0] = t_pure; }
void IMB_ialltoall_pure(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-NBC benchmark kernel Benchmarks MPI_Ialltoall. Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { int i = 0; Type_Size s_size, r_size; int s_num = 0, r_num; MPI_Request request; MPI_Status status; double t_pure = 0.; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->s_data_type, &s_size); MPI_Type_size(c_info->s_data_type, &r_size); if ((s_size != 0) && (r_size != 0)) { s_num = size / s_size; r_num = size / r_size; } if(c_info->rank != -1) { for (i = 0; i < N_BARR; i++) { MPI_Barrier(c_info->communicator); } t_pure = MPI_Wtime(); for(i = 0; i < ITERATIONS->n_sample; i++) { ierr = MPI_Ialltoall((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, s_num, c_info->s_data_type, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, r_num, c_info->r_data_type, c_info->communicator, &request); MPI_ERRHAND(ierr); MPI_Wait(&request, &status); CHK_DIFF("Ialltoall_pure", c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, ((size_t)c_info->rank * (size_t) size), 0, ((size_t)c_info->num_procs * (size_t)size), 1, put, 0, ITERATIONS->n_sample, i, -2, &defect); } t_pure = (MPI_Wtime() - t_pure) / ITERATIONS->n_sample; } time[0] = t_pure; }
void IMB_ibcast(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-NBC benchmark kernel Benchmarks MPI_Ibcast Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { int i = 0, root = 0; Type_Size s_size; int s_num = 0; void* bc_buf = NULL; MPI_Request request; MPI_Status status; double t_pure = 0., t_comp = 0., t_ovrlp = 0.; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->s_data_type, &s_size); if (s_size != 0) { s_num = size / s_size; } if(c_info->rank != -1) { IMB_ibcast_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure); /* INITIALIZATION CALL */ IMB_cpu_exploit(t_pure, 1); root = 0; for(i=0; i<N_BARR; i++) { MPI_Barrier(c_info->communicator); } t_ovrlp = MPI_Wtime(); for(i=0; i < ITERATIONS->n_sample; i++) { bc_buf = (root == c_info->rank) ? c_info->s_buffer : c_info->r_buffer; ierr = MPI_Ibcast((char*)bc_buf + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, s_num, c_info->s_data_type, root, c_info->communicator, &request); MPI_ERRHAND(ierr); t_comp -= MPI_Wtime(); IMB_cpu_exploit(t_pure, 0); t_comp += MPI_Wtime(); MPI_Wait(&request, &status); CHK_DIFF("Ibcast", c_info, (char*)bc_buf + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, 0, size, size, 1, put, 0, ITERATIONS->n_sample, i, root, &defect); /* CHANGE THE ROOT NODE */ root = (++root) % c_info->num_procs; } t_ovrlp = (MPI_Wtime() - t_ovrlp) / ITERATIONS->n_sample; t_comp /= ITERATIONS->n_sample; } time[0] = t_pure; time[1] = t_ovrlp; time[2] = t_comp; }
void IMB_allgather(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-1 benchmark kernel Benchmarks MPI_Allgather Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { double t1, t2; int i; Type_Size s_size,r_size; int s_num, r_num; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->s_data_type,&s_size); MPI_Type_size(c_info->r_data_type,&r_size); if ((s_size!=0) && (r_size!=0)) { s_num=size/s_size; r_num=size/r_size; } if(c_info->rank!=-1) { for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator); t1 = MPI_Wtime(); for(i=0;i< ITERATIONS->n_sample;i++) { ierr = MPI_Allgather((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs, s_num,c_info->s_data_type, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, r_num,c_info->r_data_type, c_info->communicator); MPI_ERRHAND(ierr); CHK_DIFF("Allgather",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0, 0, (size_t) c_info->num_procs* (size_t) size, 1, put, 0, ITERATIONS->n_sample, i, -2, &defect); } t2 = MPI_Wtime(); *time=(t2 - t1)/ITERATIONS->n_sample; } else { *time = 0.; } }
void IMB_scatterv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-1 benchmark kernel Benchmarks MPI_Scatterv Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { double t1, t2; int i; Type_Size s_size,r_size; int s_num, r_num; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->s_data_type,&s_size); MPI_Type_size(c_info->r_data_type,&r_size); if ((s_size!=0) && (r_size!=0)) { s_num=size/s_size; r_num=size/r_size; } /* INITIALIZATION OF DISPLACEMENT and RECEIVE COUNTS */ for (i=0;i<c_info->num_procs ;i++) { c_info->sdispl[i] = s_num*i; c_info->sndcnt[i] = s_num; } if(c_info->rank!=-1) { for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator); t1 = MPI_Wtime(); for(i=0;i<ITERATIONS->n_sample;i++) { ierr = MPI_Scatterv((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs, c_info->sndcnt,c_info->sdispl, c_info->s_data_type, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, // root = round robin r_num, c_info->r_data_type, i%c_info->num_procs, c_info->communicator); MPI_ERRHAND(ierr); CHK_DIFF("Scatterv",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, c_info->sdispl[c_info->rank], size, size, 1, put, 0, ITERATIONS->n_sample, i, i%c_info->num_procs, &defect); } t2 = MPI_Wtime(); *time=(t2 - t1)/ITERATIONS->n_sample; } else { *time = 0.; } }