/* given a list of files print from the start to end of the list */ void mfu_flist_print(mfu_flist flist) { /* number of items to print from start and end of list */ uint64_t range = 10; /* allocate send and receive buffers */ size_t pack_size = mfu_flist_file_pack_size(flist); size_t bufsize = 2 * range * pack_size; void* sendbuf = MFU_MALLOC(bufsize); void* recvbuf = MFU_MALLOC(bufsize); /* get our rank and the size of comm_world */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* identify the number of items we have, the total number, * and our offset in the global list */ uint64_t count = mfu_flist_size(flist); uint64_t total = mfu_flist_global_size(flist); uint64_t offset = mfu_flist_global_offset(flist); /* count the number of items we'll send */ int num = 0; uint64_t idx = 0; while (idx < count) { uint64_t global = offset + idx; if (global < range || (total - global) <= range) { num++; } idx++; } /* allocate arrays to store counts and displacements */ int* counts = (int*) MFU_MALLOC((size_t)ranks * sizeof(int)); int* disps = (int*) MFU_MALLOC((size_t)ranks * sizeof(int)); /* tell rank 0 where the data is coming from */ int bytes = num * (int)pack_size; MPI_Gather(&bytes, 1, MPI_INT, counts, 1, MPI_INT, 0, MPI_COMM_WORLD); /* pack items into sendbuf */ idx = 0; char* ptr = (char*) sendbuf; while (idx < count) { uint64_t global = offset + idx; if (global < range || (total - global) <= range) { ptr += mfu_flist_file_pack(ptr, flist, idx); } idx++; } /* compute displacements and total bytes */ int recvbytes = 0; if (rank == 0) { int i; disps[0] = 0; recvbytes += counts[0]; for (i = 1; i < ranks; i++) { disps[i] = disps[i - 1] + counts[i - 1]; recvbytes += counts[i]; } } /* gather data to rank 0 */ MPI_Gatherv(sendbuf, bytes, MPI_BYTE, recvbuf, counts, disps, MPI_BYTE, 0, MPI_COMM_WORLD); /* create temporary list to unpack items into */ mfu_flist tmplist = mfu_flist_subset(flist); /* unpack items into new list */ if (rank == 0) { ptr = (char*) recvbuf; char* end = ptr + recvbytes; while (ptr < end) { mfu_flist_file_unpack(ptr, tmplist); ptr += pack_size; } } /* summarize list */ mfu_flist_summarize(tmplist); /* print files */ if (rank == 0) { printf("\n"); uint64_t tmpidx = 0; uint64_t tmpsize = mfu_flist_size(tmplist); while (tmpidx < tmpsize) { print_file(tmplist, tmpidx); tmpidx++; if (tmpidx == range && total > 2 * range) { /* going to have to leave some out */ printf("\n<snip>\n\n"); } } printf("\n"); } /* free our temporary list */ mfu_flist_free(&tmplist); /* free memory */ mfu_free(&disps); mfu_free(&counts); mfu_free(&sendbuf); mfu_free(&recvbuf); return; }
static int sort_files_readdir(const char* sortfields, mfu_flist* pflist) { /* get list from caller */ mfu_flist flist = *pflist; /* create a new list as subset of original list */ mfu_flist flist2 = mfu_flist_subset(flist); uint64_t incount = mfu_flist_size(flist); uint64_t chars = mfu_flist_file_max_name(flist); /* create datatype for packed file list element */ MPI_Datatype dt_sat; size_t bytes = mfu_flist_file_pack_size(flist); MPI_Type_contiguous((int)bytes, MPI_BYTE, &dt_sat); /* get our rank and the size of comm_world */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* build type for file path */ MPI_Datatype dt_filepath; MPI_Type_contiguous((int)chars, MPI_CHAR, &dt_filepath); MPI_Type_commit(&dt_filepath); /* build comparison op for filenames */ DTCMP_Op op_filepath; if (DTCMP_Op_create(dt_filepath, my_strcmp, &op_filepath) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create sorting operation for filepath"); } /* build comparison op for filenames */ DTCMP_Op op_filepath_rev; if (DTCMP_Op_create(dt_filepath, my_strcmp_rev, &op_filepath_rev) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create reverse sorting operation for filepath"); } /* TODO: process sort fields */ const int MAXFIELDS = 1; MPI_Datatype types[MAXFIELDS]; DTCMP_Op ops[MAXFIELDS]; sort_field fields[MAXFIELDS]; size_t lengths[MAXFIELDS]; int nfields = 0; for (nfields = 0; nfields < MAXFIELDS; nfields++) { types[nfields] = MPI_DATATYPE_NULL; ops[nfields] = DTCMP_OP_NULL; } nfields = 0; char* sortfields_copy = MFU_STRDUP(sortfields); char* token = strtok(sortfields_copy, ","); while (token != NULL) { int valid = 1; if (strcmp(token, "name") == 0) { types[nfields] = dt_filepath; ops[nfields] = op_filepath; fields[nfields] = FILENAME; lengths[nfields] = chars; } else if (strcmp(token, "-name") == 0) { types[nfields] = dt_filepath; ops[nfields] = op_filepath_rev; fields[nfields] = FILENAME; lengths[nfields] = chars; } else { /* invalid token */ valid = 0; if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Invalid sort field: %s\n", token); } } if (valid) { nfields++; } if (nfields > MAXFIELDS) { /* TODO: print warning if we have too many fields */ break; } token = strtok(NULL, ","); } mfu_free(&sortfields_copy); /* build key type */ MPI_Datatype dt_key; if (DTCMP_Type_create_series(nfields, types, &dt_key) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create type for key"); } /* create sort op */ DTCMP_Op op_key; if (DTCMP_Op_create_series(nfields, ops, &op_key) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create sorting operation for key"); } /* build keysat type */ MPI_Datatype dt_keysat, keysat_types[2]; keysat_types[0] = dt_key; keysat_types[1] = dt_sat; if (DTCMP_Type_create_series(2, keysat_types, &dt_keysat) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create type for keysat"); } /* get extent of key type */ MPI_Aint key_lb, key_extent; MPI_Type_get_extent(dt_key, &key_lb, &key_extent); /* get extent of keysat type */ MPI_Aint keysat_lb, keysat_extent; MPI_Type_get_extent(dt_keysat, &keysat_lb, &keysat_extent); /* get extent of sat type */ MPI_Aint sat_lb, sat_extent; MPI_Type_get_extent(dt_sat, &sat_lb, &sat_extent); /* compute size of sort element and allocate buffer */ size_t sortbufsize = (size_t)keysat_extent * incount; void* sortbuf = MFU_MALLOC(sortbufsize); /* copy data into sort elements */ uint64_t idx = 0; char* sortptr = (char*) sortbuf; while (idx < incount) { /* copy in access time */ int i; for (i = 0; i < nfields; i++) { if (fields[i] == FILENAME) { const char* name = mfu_flist_file_get_name(flist, idx); strcpy(sortptr, name); } sortptr += lengths[i]; } /* pack file element */ sortptr += mfu_flist_file_pack(sortptr, flist, idx); idx++; } /* sort data */ void* outsortbuf; int outsortcount; DTCMP_Handle handle; int sort_rc = DTCMP_Sortz( sortbuf, (int)incount, &outsortbuf, &outsortcount, dt_key, dt_keysat, op_key, DTCMP_FLAG_NONE, MPI_COMM_WORLD, &handle ); if (sort_rc != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to sort data"); } /* step through sorted data filenames */ idx = 0; sortptr = (char*) outsortbuf; while (idx < (uint64_t)outsortcount) { sortptr += key_extent; sortptr += mfu_flist_file_unpack(sortptr, flist2); idx++; } /* build summary of new list */ mfu_flist_summarize(flist2); /* free memory */ DTCMP_Free(&handle); /* free ops */ DTCMP_Op_free(&op_key); DTCMP_Op_free(&op_filepath_rev); DTCMP_Op_free(&op_filepath); /* free types */ MPI_Type_free(&dt_keysat); MPI_Type_free(&dt_key); MPI_Type_free(&dt_filepath); /* free input buffer holding sort elements */ mfu_free(&sortbuf); /* free the satellite type */ MPI_Type_free(&dt_sat); /* return new list and free old one */ *pflist = flist2; mfu_flist_free(&flist); return MFU_SUCCESS; }
/* given an input list and a map function pointer, call map function * for each item in list, identify new rank to send item to and then * exchange items among ranks and return new output list */ mfu_flist mfu_flist_remap(mfu_flist list, mfu_flist_map_fn map, const void* args) { uint64_t idx; /* create new list as subset (actually will be a remapping of * input list */ mfu_flist newlist = mfu_flist_subset(list); /* get our rank and number of ranks in job */ int ranks; MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* allocate arrays for alltoall */ size_t bufsize = (size_t)ranks * sizeof(int); int* sendsizes = (int*) MFU_MALLOC(bufsize); int* senddisps = (int*) MFU_MALLOC(bufsize); int* sendoffset = (int*) MFU_MALLOC(bufsize); int* recvsizes = (int*) MFU_MALLOC(bufsize); int* recvdisps = (int*) MFU_MALLOC(bufsize); /* initialize sendsizes and offsets */ int i; for (i = 0; i < ranks; i++) { sendsizes[i] = 0; sendoffset[i] = 0; } /* get number of elements in our local list */ uint64_t size = mfu_flist_size(list); /* allocate space to record file-to-rank mapping */ int* file2rank = (int*) MFU_MALLOC(size * sizeof(int)); /* call map function for each item to identify its new rank, * and compute number of bytes we'll send to each rank */ size_t sendbytes = 0; for (idx = 0; idx < size; idx++) { /* determine which rank we'll map this file to */ int dest = map(list, idx, ranks, args); /* cache mapping so we don't have to compute it again * below while packing items for send */ file2rank[idx] = dest; /* TODO: check that pack size doesn't overflow int */ /* total number of bytes we'll send to each rank and the total overall */ size_t count = mfu_flist_file_pack_size(list); sendsizes[dest] += (int) count; sendbytes += count; } /* compute send buffer displacements */ senddisps[0] = 0; for (i = 1; i < ranks; i++) { senddisps[i] = senddisps[i - 1] + sendsizes[i - 1]; } /* allocate space for send buffer */ char* sendbuf = (char*) MFU_MALLOC(sendbytes); /* copy data into send buffer */ for (idx = 0; idx < size; idx++) { /* determine which rank we mapped this file to */ int dest = file2rank[idx]; /* get pointer into send buffer and pack item */ char* ptr = sendbuf + senddisps[dest] + sendoffset[dest]; size_t count = mfu_flist_file_pack(ptr, list, idx); /* TODO: check that pack size doesn't overflow int */ /* bump up the offset for this rank */ sendoffset[dest] += (int) count; } /* alltoall to get our incoming counts */ MPI_Alltoall(sendsizes, 1, MPI_INT, recvsizes, 1, MPI_INT, MPI_COMM_WORLD); /* compute size of recvbuf and displacements */ size_t recvbytes = 0; recvdisps[0] = 0; for (i = 0; i < ranks; i++) { recvbytes += (size_t) recvsizes[i]; if (i > 0) { recvdisps[i] = recvdisps[i - 1] + recvsizes[i - 1]; } } /* allocate recvbuf */ char* recvbuf = (char*) MFU_MALLOC(recvbytes); /* alltoallv to send data */ MPI_Alltoallv( sendbuf, sendsizes, senddisps, MPI_CHAR, recvbuf, recvsizes, recvdisps, MPI_CHAR, MPI_COMM_WORLD ); /* unpack items into new list */ char* ptr = recvbuf; char* recvend = recvbuf + recvbytes; while (ptr < recvend) { size_t count = mfu_flist_file_unpack(ptr, newlist); ptr += count; } mfu_flist_summarize(newlist); /* free memory */ mfu_free(&file2rank); mfu_free(&recvbuf); mfu_free(&recvdisps); mfu_free(&recvsizes); mfu_free(&sendbuf); mfu_free(&sendoffset); mfu_free(&senddisps); mfu_free(&sendsizes); /* return list to caller */ return newlist; }