int split_bin_2level( int bins, int bin, const lwgrp_ring* lev1_ring, const lwgrp_logring* lev1_logring, const lwgrp_ring* lev2_ring, const lwgrp_logring* lev2_logring, lwgrp_ring* new_lev1_ring, lwgrp_logring* new_lev1_logring, lwgrp_ring* new_lev2_ring, lwgrp_logring* new_lev2_logring) { int i; /* initialize new rings and logrings to empty groups, * we'll overwrite these if proc is really in a group */ lwgrp_ring_set_null(new_lev1_ring); lwgrp_ring_set_null(new_lev2_ring); lwgrp_logring_build_from_ring(new_lev1_ring, new_lev1_logring); lwgrp_logring_build_from_ring(new_lev2_ring, new_lev2_logring); if (bins <= 0) { return 0; } /* get our rank within and the size of the parent communicator */ int comm_size; int comm_rank = lev1_ring->comm_rank; MPI_Comm_size(lev1_ring->comm, &comm_size); /* allocate memory to execute collectives */ int* reduce_inbuf = (int*) malloc(bins * sizeof(int)); int* reduce_outbuf = (int*) malloc(bins * sizeof(int)); int* scan_inbuf = (int*) malloc(2 * bins * sizeof(int)); int* scan_recvleft = (int*) malloc(2 * bins * sizeof(int)); int* scan_recvright = (int*) malloc(2 * bins * sizeof(int)); /* intiaize all bins to MPI_PROC_NULL, except for our * bin in which case we list our rank within comm */ for (i = 0; i < bins; i++) { /* initialize all bins to size(lev1), would like MPI_PROC_NULL, * but we use size instead so that reduce(min) does the right thing */ reduce_inbuf[i] = comm_size; } if (bin >= 0) { reduce_inbuf[bin] = comm_rank; } /* reduce to node leader to find lowest rank in each bin */ lwgrp_logring_reduce( reduce_inbuf, reduce_outbuf, bins, MPI_INT, MPI_MIN, 0, lev1_ring, lev1_logring ); /* create the scan type (a rank and a count pair) */ MPI_Datatype scan_type; MPI_Type_contiguous(2, MPI_INT, &scan_type); MPI_Type_commit(&scan_type); /* double exscan across node leaders to * build info for new node leader chains */ int lev1_rank = lev1_ring->group_rank; if (lev1_rank == 0) { /* prepare data for input to double scan, for each bin * record the lowest rank and a count of either 0 or 1 */ for (i = 0; i < bins; i++) { if (reduce_outbuf[i] != comm_size) { scan_inbuf[i*2 + SCAN_RANK] = reduce_outbuf[i]; scan_inbuf[i*2 + SCAN_COUNT] = 1; } else { scan_inbuf[i*2 + SCAN_RANK] = MPI_PROC_NULL; scan_inbuf[i*2 + SCAN_COUNT] = 0; } } /* create the scan operation */ MPI_Op scan_op; int commutative = 0; MPI_Op_create(scan_chain, commutative, &scan_op); /* execute the double exclusive scan to get next rank and * count of ranks to either side for each bin */ lwgrp_logring_double_exscan( scan_inbuf, scan_recvright, scan_inbuf, scan_recvleft, bins, scan_type, scan_op, lev2_ring, lev2_logring ); /* if we're on the end of the level 2 group, need to initialize * the recv values */ int lev2_rank = lev2_ring->group_rank; int lev2_size = lev2_ring->group_size; if (lev2_rank == 0) { /* we're on the left end of lev2 group, so we didn't get * anything from the left side */ for (i = 0; i < bins; i++) { scan_recvleft[i*2 + SCAN_RANK] = MPI_PROC_NULL; scan_recvleft[i*2 + SCAN_COUNT] = 0; } } if (lev2_rank == lev2_size-1) { /* we're on the right end of lev2 group, so we didn't get * anything from the right side */ for (i = 0; i < bins; i++) { scan_recvright[i*2 + SCAN_RANK] = MPI_PROC_NULL; scan_recvright[i*2 + SCAN_COUNT] = 0; } } /* free the scan op */ MPI_Op_free(&scan_op); } /* broadcast scan results to local comm */ lwgrp_logring_bcast(scan_recvleft, bins, scan_type, 0, lev1_ring, lev1_logring); lwgrp_logring_bcast(scan_recvright, bins, scan_type, 0, lev1_ring, lev1_logring); /* free the scan type */ MPI_Type_free(&scan_type); /* call bin_split on local chain */ lwgrp_ring_split_bin_radix(bins, bin, lev1_ring, new_lev1_ring); lwgrp_logring_build_from_ring(new_lev1_ring, new_lev1_logring); /* for each valid bin, all rank 0 procs of new lev1 groups form new lev2 groups */ if (bin >= 0) { int new_lev1_rank = new_lev1_ring->group_rank; if (new_lev1_rank == 0) { /* extract chain values from scan results */ MPI_Comm comm = new_lev1_ring->comm; int left = scan_recvleft[2*bin + SCAN_RANK]; int right = scan_recvright[2*bin + SCAN_RANK]; int size = scan_recvleft[2*bin + SCAN_COUNT] + scan_recvright[2*bin + SCAN_COUNT] + 1; int rank = scan_recvleft[2*bin + SCAN_COUNT]; /* build chain, then ring, then logring, and finally free chain */ lwgrp_chain tmp_chain; lwgrp_chain_build_from_vals(comm, left, right, size, rank, &tmp_chain); lwgrp_ring_build_from_chain(&tmp_chain, new_lev2_ring); lwgrp_logring_build_from_ring(new_lev2_ring, new_lev2_logring); lwgrp_chain_free(&tmp_chain); } } /* free our temporary memory */ free(scan_recvright); free(scan_recvleft); free(scan_inbuf); free(reduce_outbuf); free(reduce_inbuf); return 0; }
int main (int argc, char* argv[]) { int color, key; double start, end; MPI_Comm newcomm; MPI_Init(&argc, &argv); int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); int size; int* members = (int*) malloc(ranks * sizeof(int)); lwgrp_ring group; lwgrp_ring_build_from_comm(MPI_COMM_WORLD, &group); lwgrp_ring group_split; lwgrp_ring_split_bin(2, (rank%2), &group, &group_split); lwgrp_logring logring; lwgrp_logring_build_from_ring(&group, &logring); int* inbuf = (int*) malloc(ranks * sizeof(int)); int* outbuf = (int*) malloc(ranks * sizeof(int)); int i; for (i = 0; i < ranks; i++) { inbuf[i] = rank * ranks + i; outbuf[i] = -1; } lwgrp_logring_barrier(&group, &logring); int bcastbuf = rank; lwgrp_logring_bcast(&bcastbuf, 1, MPI_INT, 0, &group, &logring); lwgrp_logring_allgather_brucks(&rank, members, 1, MPI_INT, &group, &logring); lwgrp_logring_alltoall_brucks(inbuf, outbuf, 1, MPI_INT, &group, &logring); int sum = -1; #if MPI_VERSION >= 2 && MPI_SUBVERSION >= 2 lwgrp_logring_allreduce(&rank, &sum, 1, MPI_INT, MPI_SUM, &group, &logring); lwgrp_logring_scan(&rank, &sum, 1, MPI_INT, MPI_SUM, &group, &logring); lwgrp_logring_exscan(&rank, &sum, 1, MPI_INT, MPI_SUM, &group, &logring); #endif #if 0 #if MPI_VERSION >= 2 && MPI_SUBVERSION >= 2 int myval = rank*2 + 1; int allval; lwgrp_chain_allreduce(&myval, &allval, 1, MPI_INT, MPI_SUM, &group); printf("rank=%d input=%d output=%d\n", rank, myval, allval); fflush(stdout); int ltr_send = 1; int rtl_send = -1; int ltr_recv = 100; int rtl_recv = 100; lwgrp_chain_double_exscan(<r_send, <r_recv, &rtl_send, &rtl_recv, 1, MPI_INT, MPI_SUM, &group); printf("rank=%d ltr_send=%d ltr_recv=%d rtl_send=%d rtl_recv=%d\n", rank, ltr_send, ltr_recv, rtl_send, rtl_recv ); fflush(stdout); #endif #endif lwgrp_ring_free(&group); MPI_Finalize(); return 0; int tag1 = 0; int tag2 = 1; color = 0; key = rank; start = MPI_Wtime(); lwgrp_comm_split_members(MPI_COMM_WORLD, color, key, tag1, tag2, &size, members); end = MPI_Wtime(); print_members(1, rank, size, members); if (rank == 0) { printf("lwgrp_comm_split_members time %f secs\n", end - start); } #if MPI_VERSION >= 2 && MPI_SUBVERSION >= 2 start = MPI_Wtime(); lwgrp_comm_split_create(MPI_COMM_WORLD, color, key, tag1, tag2, &newcomm); end = MPI_Wtime(); print_comm(2, rank, newcomm); if (newcomm != MPI_COMM_NULL) { MPI_Comm_free(&newcomm); } if (rank == 0) { printf("lwgrp_comm_split_create time %f secs\n", end - start); } #endif start = MPI_Wtime(); MPI_Comm_split(MPI_COMM_WORLD, color, key, &newcomm); end = MPI_Wtime(); if (newcomm != MPI_COMM_NULL) { MPI_Comm_free(&newcomm); } if (rank == 0) { printf("MPI_Comm_split time %f secs\n", end - start); } color = 0; key = -rank; start = MPI_Wtime(); lwgrp_comm_split_members(MPI_COMM_WORLD, color, key, tag1, tag2, &size, members); end = MPI_Wtime(); print_members(3, rank, size, members); if (rank == 0) { printf("lwgrp_comm_split_members time %f secs\n", end - start); } #if MPI_VERSION >= 2 && MPI_SUBVERSION >= 2 start = MPI_Wtime(); lwgrp_comm_split_create(MPI_COMM_WORLD, color, key, tag1, tag2, &newcomm); end = MPI_Wtime(); print_comm(4, rank, newcomm); if (newcomm != MPI_COMM_NULL) { MPI_Comm_free(&newcomm); } if (rank == 0) { printf("lwgrp_comm_split_create time %f secs\n", end - start); } #endif start = MPI_Wtime(); MPI_Comm_split(MPI_COMM_WORLD, color, key, &newcomm); end = MPI_Wtime(); if (newcomm != MPI_COMM_NULL) { MPI_Comm_free(&newcomm); } if (rank == 0) { printf("MPI_Comm_split time %f secs\n", end - start); } color = rank; key = -rank; start = MPI_Wtime(); lwgrp_comm_split_members(MPI_COMM_WORLD, color, key, tag1, tag2, &size, members); end = MPI_Wtime(); print_members(5, rank, size, members); if (rank == 0) { printf("lwgrp_comm_split_members time %f secs\n", end - start); } #if MPI_VERSION >= 2 && MPI_SUBVERSION >= 2 start = MPI_Wtime(); lwgrp_comm_split_create(MPI_COMM_WORLD, color, key, tag1, tag2, &newcomm); end = MPI_Wtime(); print_comm(6, rank, newcomm); if (newcomm != MPI_COMM_NULL) { MPI_Comm_free(&newcomm); } if (rank == 0) { printf("lwgrp_comm_split_create time %f secs\n", end - start); } #endif start = MPI_Wtime(); MPI_Comm_split(MPI_COMM_WORLD, color, key, &newcomm); end = MPI_Wtime(); if (newcomm != MPI_COMM_NULL) { MPI_Comm_free(&newcomm); } if (rank == 0) { printf("MPI_Comm_split time %f secs\n", end - start); } color = (rank % 2) ? 0 : MPI_UNDEFINED; key = -rank; start = MPI_Wtime(); lwgrp_comm_split_members(MPI_COMM_WORLD, color, key, tag1, tag2, &size, members); end = MPI_Wtime(); print_members(7, rank, size, members); if (rank == 0) { printf("lwgrp_comm_split_members time %f secs\n", end - start); } #if MPI_VERSION >= 2 && MPI_SUBVERSION >= 2 start = MPI_Wtime(); lwgrp_comm_split_create(MPI_COMM_WORLD, color, key, tag1, tag2, &newcomm); end = MPI_Wtime(); print_comm(8, rank, newcomm); if (newcomm != MPI_COMM_NULL) { MPI_Comm_free(&newcomm); } if (rank == 0) { printf("lwgrp_comm_split_create time %f secs\n", end - start); } #endif start = MPI_Wtime(); MPI_Comm_split(MPI_COMM_WORLD, color, key, &newcomm); end = MPI_Wtime(); if (newcomm != MPI_COMM_NULL) { MPI_Comm_free(&newcomm); } if (rank == 0) { printf("MPI_Comm_split time %f secs\n", end - start); } color = rank % 2; key = rank; start = MPI_Wtime(); lwgrp_comm_split_members(MPI_COMM_WORLD, color, key, tag1, tag2, &size, members); end = MPI_Wtime(); print_members(9, rank, size, members); if (rank == 0) { printf("lwgrp_comm_split_members time %f secs\n", end - start); } #if MPI_VERSION >= 2 && MPI_SUBVERSION >= 2 start = MPI_Wtime(); lwgrp_comm_split_create(MPI_COMM_WORLD, color, key, tag1, tag2, &newcomm); end = MPI_Wtime(); print_comm(10, rank, newcomm); if (newcomm != MPI_COMM_NULL) { MPI_Comm_free(&newcomm); } if (rank == 0) { printf("lwgrp_comm_split_create time %f secs\n", end - start); } #endif start = MPI_Wtime(); MPI_Comm_split(MPI_COMM_WORLD, color, key, &newcomm); end = MPI_Wtime(); if (newcomm != MPI_COMM_NULL) { MPI_Comm_free(&newcomm); } if (rank == 0) { printf("MPI_Comm_split time %f secs\n", end - start); } color = rank / 4; key = rank; start = MPI_Wtime(); lwgrp_comm_split_members(MPI_COMM_WORLD, color, key, tag1, tag2, &size, members); end = MPI_Wtime(); print_members(11, rank, size, members); if (rank == 0) { printf("lwgrp_comm_split_members time %f secs\n", end - start); } #if MPI_VERSION >= 2 && MPI_SUBVERSION >= 2 start = MPI_Wtime(); lwgrp_comm_split_create(MPI_COMM_WORLD, color, key, tag1, tag2, &newcomm); end = MPI_Wtime(); print_comm(12, rank, newcomm); if (newcomm != MPI_COMM_NULL) { MPI_Comm_free(&newcomm); } if (rank == 0) { printf("lwgrp_comm_split_create time %f secs\n", end - start); } #endif start = MPI_Wtime(); MPI_Comm_split(MPI_COMM_WORLD, color, key, &newcomm); end = MPI_Wtime(); if (newcomm != MPI_COMM_NULL) { MPI_Comm_free(&newcomm); } if (rank == 0) { printf("MPI_Comm_split time %f secs\n", end - start); } free(members); MPI_Finalize(); return 0; }