void IMB_ibarrier(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) { int i = 0; MPI_Request request; MPI_Status status; double t_pure = 0., t_comp = 0., t_ovrlp = 0.; #ifdef CHECK defect=0.; #endif ierr = 0; if(c_info->rank != -1) { IMB_ibarrier_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure); /* INITIALIZATION CALL */ IMB_cpu_exploit(t_pure, 1); IMB_do_n_barriers (c_info->communicator, N_BARR); t_ovrlp = MPI_Wtime(); for(i=0; i < ITERATIONS->n_sample; i++) { ierr = MPI_Ibarrier(c_info->communicator, &request); MPI_ERRHAND(ierr); t_comp -= MPI_Wtime(); IMB_cpu_exploit(t_pure, 0); t_comp += MPI_Wtime(); MPI_Wait(&request, &status); } t_ovrlp = (MPI_Wtime() - t_ovrlp) / ITERATIONS->n_sample; t_comp /= ITERATIONS->n_sample; } time[0] = t_pure; time[1] = t_ovrlp; time[2] = t_comp; }
void IMB_ialltoall(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-NBC benchmark kernel Benchmarks MPI_Ialltoall Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) Output variables: -time (type double*) Timing result per sample */ { int i = 0; Type_Size s_size, r_size; int s_num = 0, r_num = 0; MPI_Request request; MPI_Status status; double t_pure = 0., t_comp = 0., t_ovrlp = 0.; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->s_data_type, &s_size); MPI_Type_size(c_info->r_data_type, &r_size); if ((s_size != 0) && (r_size != 0)) { s_num = size / s_size; r_num = size / r_size; } if(c_info->rank != -1) { IMB_ialltoall_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure); /* INITIALIZATION CALL */ IMB_cpu_exploit(t_pure, 1); for(i = 0; i < N_BARR; i++) { MPI_Barrier(c_info->communicator); } t_ovrlp = MPI_Wtime(); for(i = 0; i < ITERATIONS->n_sample; i++) { ierr = MPI_Ialltoall((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, s_num, c_info->s_data_type, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, r_num, c_info->r_data_type, c_info->communicator, &request); MPI_ERRHAND(ierr); t_comp -= MPI_Wtime(); IMB_cpu_exploit(t_pure, 0); t_comp += MPI_Wtime(); MPI_Wait(&request, &status); CHK_DIFF("Ialltoall", c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, ((size_t)c_info->rank * (size_t) size), 0, ((size_t)c_info->num_procs * (size_t)size), 1, put, 0, ITERATIONS->n_sample, i, -2, &defect); } t_ovrlp = (MPI_Wtime() - t_ovrlp) / ITERATIONS->n_sample; t_comp /= ITERATIONS->n_sample; } time[0] = t_pure; time[1] = t_ovrlp; time[2] = t_comp; }
void IMB_iallreduce(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) { int i = 0; Type_Size s_size; int s_num = 0; MPI_Request request; MPI_Status status; double t_pure = 0., t_comp = 0., t_ovrlp = 0.; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->red_data_type, &s_size); if (s_size != 0) { s_num = size / s_size; } if(c_info->rank != -1) { IMB_iallreduce_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure); /* INITIALIZATION CALL */ IMB_cpu_exploit(t_pure, 1); IMB_do_n_barriers (c_info->communicator, N_BARR); for(i = 0; i < ITERATIONS->n_sample; i++) { t_ovrlp -= MPI_Wtime(); ierr = MPI_Iallreduce((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, s_num, c_info->red_data_type, c_info->op_type, c_info->communicator, &request); MPI_ERRHAND(ierr); t_comp -= MPI_Wtime(); IMB_cpu_exploit(t_pure, 0); t_comp += MPI_Wtime(); MPI_Wait(&request, &status); t_ovrlp += MPI_Wtime(); CHK_DIFF("Iallreduce", c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, 0, size, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect); IMB_do_n_barriers (c_info->communicator, c_info->sync); } t_ovrlp /= ITERATIONS->n_sample; t_comp /= ITERATIONS->n_sample; } time[0] = t_pure; time[1] = t_ovrlp; time[2] = t_comp; }
void IMB_ibcast(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-NBC benchmark kernel Benchmarks MPI_Ibcast Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { int i = 0, root = 0; Type_Size s_size; int s_num = 0; void* bc_buf = NULL; MPI_Request request; MPI_Status status; double t_pure = 0., t_comp = 0., t_ovrlp = 0.; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->s_data_type, &s_size); if (s_size != 0) { s_num = size / s_size; } if(c_info->rank != -1) { IMB_ibcast_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure); /* INITIALIZATION CALL */ IMB_cpu_exploit(t_pure, 1); root = 0; for(i=0; i<N_BARR; i++) { MPI_Barrier(c_info->communicator); } t_ovrlp = MPI_Wtime(); for(i=0; i < ITERATIONS->n_sample; i++) { bc_buf = (root == c_info->rank) ? c_info->s_buffer : c_info->r_buffer; ierr = MPI_Ibcast((char*)bc_buf + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, s_num, c_info->s_data_type, root, c_info->communicator, &request); MPI_ERRHAND(ierr); t_comp -= MPI_Wtime(); IMB_cpu_exploit(t_pure, 0); t_comp += MPI_Wtime(); MPI_Wait(&request, &status); CHK_DIFF("Ibcast", c_info, (char*)bc_buf + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, 0, size, size, 1, put, 0, ITERATIONS->n_sample, i, root, &defect); /* CHANGE THE ROOT NODE */ root = (++root) % c_info->num_procs; } t_ovrlp = (MPI_Wtime() - t_ovrlp) / ITERATIONS->n_sample; t_comp /= ITERATIONS->n_sample; } time[0] = t_pure; time[1] = t_ovrlp; time[2] = t_comp; }
void IMB_ireduce_scatter(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) { int i = 0; Type_Size s_size; MPI_Request request; MPI_Status status; double t_pure = 0., t_comp = 0., t_ovrlp = 0.; #ifdef CHECK size_t pos = 0, pos1 = 0, pos2 = 0; int Locsize = 0; defect = 0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->red_data_type, &s_size); #ifdef CHECK if(size > 0) { for (i = 0; i < c_info->num_procs; i++) { IMB_get_rank_portion(i, c_info->num_procs, size, s_size, &pos1, &pos2); if (i == c_info->rank) { pos = pos1; Locsize = s_size * c_info->reccnt[i]; } } } #endif // CHECK if(c_info->rank != -1) { IMB_ireduce_scatter_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure); /* INITIALIZATION CALL */ IMB_cpu_exploit(t_pure, 1); for(i=0; i < N_BARR; i++) { MPI_Barrier(c_info->communicator); } t_ovrlp = MPI_Wtime(); for(i = 0; i < ITERATIONS->n_sample; i++) { ierr = MPI_Ireduce_scatter((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, c_info->reccnt, c_info->red_data_type, c_info->op_type, c_info->communicator, &request); MPI_ERRHAND(ierr); t_comp -= MPI_Wtime(); IMB_cpu_exploit(t_pure, 0); t_comp += MPI_Wtime(); MPI_Wait(&request, &status); CHK_DIFF("Ireduce_scatter", c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, pos, Locsize, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect); } t_ovrlp = (MPI_Wtime() - t_ovrlp) / ITERATIONS->n_sample; t_comp /= ITERATIONS->n_sample; } time[0] = t_pure; time[1] = t_ovrlp; time[2] = t_comp; }
void IMB_igatherv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-NBC benchmark kernel Benchmarks MPI_Igatherv Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { int i = 0; Type_Size s_size, r_size; int s_num = 0, r_num = 0; MPI_Request request; MPI_Status status; double t_pure = 0., t_comp = 0., t_ovrlp = 0.; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->s_data_type, &s_size); MPI_Type_size(c_info->r_data_type, &r_size); if ((s_size!=0) && (r_size!=0)) { s_num = size / s_size; r_num = size / r_size; } if(c_info->rank != -1) { int root = 0; /* GET PURE TIME. DISPLACEMENT AND RECEIVE COUNT WILL BE INITIALIZED HERE */ IMB_igatherv_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure); /* INITIALIZATION CALL */ IMB_cpu_exploit(t_pure, 1); IMB_do_n_barriers(c_info->communicator, N_BARR); for(i=0; i < ITERATIONS->n_sample; i++) { t_ovrlp -= MPI_Wtime(); ierr = MPI_Igatherv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, s_num, c_info->s_data_type, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, c_info->reccnt, c_info->rdispl, c_info->r_data_type, root, c_info->communicator, &request); MPI_ERRHAND(ierr); t_comp -= MPI_Wtime(); IMB_cpu_exploit(t_pure, 0); t_comp += MPI_Wtime(); MPI_Wait(&request, &status); t_ovrlp += MPI_Wtime(); #ifdef CHECK if (c_info->rank == root) { CHK_DIFF("Igatherv", c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, 0, 0, ((size_t)c_info->num_procs * (size_t)size), 1, put, 0, ITERATIONS->n_sample, i, -2, &defect); } #endif // CHECK root = (root + c_info->root_shift) % c_info->num_procs; IMB_do_n_barriers(c_info->communicator, c_info->sync); } t_ovrlp /= ITERATIONS->n_sample; t_comp /= ITERATIONS->n_sample; } time[0] = t_pure; time[1] = t_ovrlp; time[2] = t_comp; }