/* map function to evenly spread list among ranks, using block allocation */ static int map_spread(mfu_flist flist, uint64_t idx, int ranks, void* args) { /* compute global index of this item */ uint64_t offset = mfu_flist_global_offset(flist); uint64_t global_idx = offset + idx; /* get global size of the list */ uint64_t global_size = mfu_flist_global_size(flist); /* get whole number of items on each rank */ uint64_t items_per_rank = global_size / (uint64_t)ranks; /* if global list size is not divisible by the number of ranks * then we need to use the remainder */ uint64_t remainder = (global_size) - (items_per_rank * (uint64_t)ranks); /* If have a remainder, then we give one extra item to * to each rank starting from rank 0. Compute the number * of items contained in this set of ranks. There are * remainder such ranks. */ uint64_t extra = remainder * (items_per_rank + 1); /* break up into two cases: if you are adding +1 or not * calculate target rank */ int target_rank; if (global_idx < extra) { /* the item falls in the set of ranks that all have an extra item */ target_rank = (int) (global_idx / (items_per_rank + 1)); } else { /* the item falls into the set of ranks beyond the set holding an extra item */ target_rank = (int) (remainder + (global_idx - extra) / items_per_rank); } return target_rank; }
/* for given depth, evenly spread the files among processes for * improved load balancing */ static void remove_spread(mfu_flist flist, uint64_t* rmcount) { uint64_t idx; /* initialize our remove count */ *rmcount = 0; /* get our rank and number of ranks in job */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* allocate memory for alltoall exchanges */ size_t bufsize = (size_t)ranks * sizeof(int); int* sendcounts = (int*) MFU_MALLOC(bufsize); int* sendsizes = (int*) MFU_MALLOC(bufsize); int* senddisps = (int*) MFU_MALLOC(bufsize); int* recvsizes = (int*) MFU_MALLOC(bufsize); int* recvdisps = (int*) MFU_MALLOC(bufsize); /* get number of items */ uint64_t my_count = mfu_flist_size(flist); uint64_t all_count = mfu_flist_global_size(flist); uint64_t offset = mfu_flist_global_offset(flist); /* compute number of bytes we'll send */ size_t sendbytes = 0; for (idx = 0; idx < my_count; idx++) { const char* name = mfu_flist_file_get_name(flist, idx); size_t len = strlen(name) + 2; sendbytes += len; } /* compute the number of items that each rank should have */ uint64_t low = all_count / (uint64_t)ranks; uint64_t extra = all_count - low * (uint64_t)ranks; /* compute number that we'll send to each rank and initialize sendsizes and offsets */ uint64_t i; for (i = 0; i < (uint64_t)ranks; i++) { /* compute starting element id and count for given rank */ uint64_t start, num; if (i < extra) { num = low + 1; start = i * num; } else { num = low; start = (i - extra) * num + extra * (low + 1); } /* compute the number of items we'll send to this task */ uint64_t sendcnt = 0; if (my_count > 0) { if (start <= offset && offset < start + num) { /* this rank overlaps our range, * and its first element comes at or before our first element */ sendcnt = num - (offset - start); if (my_count < sendcnt) { /* the number the rank could receive from us * is more than we have left */ sendcnt = my_count; } } else if (offset < start && start < offset + my_count) { /* this rank overlaps our range, * and our first element comes strictly before its first element */ sendcnt = my_count - (start - offset); if (num < sendcnt) { /* the number the rank can receive from us * is less than we have left */ sendcnt = num; } } } /* record the number of items we'll send to this task */ sendcounts[i] = (int) sendcnt; /* set sizes and displacements to 0, we'll fix this later */ sendsizes[i] = 0; senddisps[i] = 0; } /* allocate space */ char* sendbuf = (char*) MFU_MALLOC(sendbytes); /* copy data into buffer */ int dest = -1; int disp = 0; for (idx = 0; idx < my_count; idx++) { /* get name and type of item */ const char* name = mfu_flist_file_get_name(flist, idx); mfu_filetype type = mfu_flist_file_get_type(flist, idx); /* get rank that we're packing data for */ if (dest == -1) { dest = get_first_nonzero(sendcounts, ranks); if (dest == -1) { /* error */ } /* about to copy first item for this rank, * record its displacement */ senddisps[dest] = disp; } /* identify region to be sent to rank */ char* path = sendbuf + disp; /* first character encodes item type */ if (type == MFU_TYPE_DIR) { path[0] = 'd'; } else if (type == MFU_TYPE_FILE || type == MFU_TYPE_LINK) { path[0] = 'f'; } else { path[0] = 'u'; } /* now copy in the path */ strcpy(&path[1], name); /* TODO: check that we don't overflow the int */ /* add bytes to sendsizes and increase displacement */ size_t count = strlen(name) + 2; sendsizes[dest] += (int) count; disp += (int) count; /* decrement the count for this rank */ sendcounts[dest]--; if (sendcounts[dest] == 0) { dest = -1; } } /* compute displacements */ senddisps[0] = 0; for (i = 1; i < (uint64_t)ranks; i++) { senddisps[i] = senddisps[i - 1] + sendsizes[i - 1]; } /* alltoall to specify incoming counts */ MPI_Alltoall(sendsizes, 1, MPI_INT, recvsizes, 1, MPI_INT, MPI_COMM_WORLD); /* compute size of recvbuf and displacements */ size_t recvbytes = 0; recvdisps[0] = 0; for (i = 0; i < (uint64_t)ranks; i++) { recvbytes += (size_t) recvsizes[i]; if (i > 0) { recvdisps[i] = recvdisps[i - 1] + recvsizes[i - 1]; } } /* allocate recvbuf */ char* recvbuf = (char*) MFU_MALLOC(recvbytes); /* alltoallv to send data */ MPI_Alltoallv( sendbuf, sendsizes, senddisps, MPI_CHAR, recvbuf, recvsizes, recvdisps, MPI_CHAR, MPI_COMM_WORLD ); /* delete data */ char* item = recvbuf; while (item < recvbuf + recvbytes) { /* get item name and type */ char type = item[0]; char* name = &item[1]; /* delete item */ remove_type(type, name); /* keep tally of number of items we deleted */ *rmcount++; /* go to next item */ size_t item_size = strlen(item) + 1; item += item_size; } /* free memory */ mfu_free(&recvbuf); mfu_free(&recvdisps); mfu_free(&recvsizes); mfu_free(&sendbuf); mfu_free(&senddisps); mfu_free(&sendsizes); mfu_free(&sendcounts); return; }
/* given a list of files print from the start to end of the list */ void mfu_flist_print(mfu_flist flist) { /* number of items to print from start and end of list */ uint64_t range = 10; /* allocate send and receive buffers */ size_t pack_size = mfu_flist_file_pack_size(flist); size_t bufsize = 2 * range * pack_size; void* sendbuf = MFU_MALLOC(bufsize); void* recvbuf = MFU_MALLOC(bufsize); /* get our rank and the size of comm_world */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* identify the number of items we have, the total number, * and our offset in the global list */ uint64_t count = mfu_flist_size(flist); uint64_t total = mfu_flist_global_size(flist); uint64_t offset = mfu_flist_global_offset(flist); /* count the number of items we'll send */ int num = 0; uint64_t idx = 0; while (idx < count) { uint64_t global = offset + idx; if (global < range || (total - global) <= range) { num++; } idx++; } /* allocate arrays to store counts and displacements */ int* counts = (int*) MFU_MALLOC((size_t)ranks * sizeof(int)); int* disps = (int*) MFU_MALLOC((size_t)ranks * sizeof(int)); /* tell rank 0 where the data is coming from */ int bytes = num * (int)pack_size; MPI_Gather(&bytes, 1, MPI_INT, counts, 1, MPI_INT, 0, MPI_COMM_WORLD); /* pack items into sendbuf */ idx = 0; char* ptr = (char*) sendbuf; while (idx < count) { uint64_t global = offset + idx; if (global < range || (total - global) <= range) { ptr += mfu_flist_file_pack(ptr, flist, idx); } idx++; } /* compute displacements and total bytes */ int recvbytes = 0; if (rank == 0) { int i; disps[0] = 0; recvbytes += counts[0]; for (i = 1; i < ranks; i++) { disps[i] = disps[i - 1] + counts[i - 1]; recvbytes += counts[i]; } } /* gather data to rank 0 */ MPI_Gatherv(sendbuf, bytes, MPI_BYTE, recvbuf, counts, disps, MPI_BYTE, 0, MPI_COMM_WORLD); /* create temporary list to unpack items into */ mfu_flist tmplist = mfu_flist_subset(flist); /* unpack items into new list */ if (rank == 0) { ptr = (char*) recvbuf; char* end = ptr + recvbytes; while (ptr < end) { mfu_flist_file_unpack(ptr, tmplist); ptr += pack_size; } } /* summarize list */ mfu_flist_summarize(tmplist); /* print files */ if (rank == 0) { printf("\n"); uint64_t tmpidx = 0; uint64_t tmpsize = mfu_flist_size(tmplist); while (tmpidx < tmpsize) { print_file(tmplist, tmpidx); tmpidx++; if (tmpidx == range && total > 2 * range) { /* going to have to leave some out */ printf("\n<snip>\n\n"); } } printf("\n"); } /* free our temporary list */ mfu_flist_free(&tmplist); /* free memory */ mfu_free(&disps); mfu_free(&counts); mfu_free(&sendbuf); mfu_free(&recvbuf); return; }