/* given an input list, split items into separate lists depending * on their depth, returns number of levels, minimum depth, and * array of lists as output */ void mfu_flist_array_by_depth( mfu_flist srclist, int* outlevels, int* outmin, mfu_flist** outlists) { /* check that our pointers are valid */ if (outlevels == NULL || outmin == NULL || outlists == NULL) { return; } /* initialize return values */ *outlevels = 0; *outmin = -1; *outlists = NULL; /* get total file count */ uint64_t total = mfu_flist_global_size(srclist); if (total == 0) { return; } /* get min and max depths, determine number of levels, * allocate array of lists */ int min = mfu_flist_min_depth(srclist); int max = mfu_flist_max_depth(srclist); int levels = max - min + 1; mfu_flist* lists = (mfu_flist*) MFU_MALLOC((size_t)levels * sizeof(mfu_flist)); /* create a list for each level */ int i; for (i = 0; i < levels; i++) { lists[i] = mfu_flist_subset(srclist); } /* copy each item from source list to its corresponding level */ uint64_t idx = 0; uint64_t size = mfu_flist_size(srclist); while (idx < size) { int depth = mfu_flist_file_get_depth(srclist, idx); int depth_index = depth - min; mfu_flist dstlist = lists[depth_index]; mfu_flist_file_copy(srclist, idx, dstlist); idx++; } /* summarize each list */ for (i = 0; i < levels; i++) { mfu_flist_summarize(lists[i]); } /* set return parameters */ *outlevels = levels; *outmin = min; *outlists = lists; return; }
/* filter the list of files down based on the current stripe size and stripe count */ static mfu_flist filter_list(mfu_flist list, int stripe_count, uint64_t stripe_size, uint64_t min_size, uint64_t* total_count, uint64_t* total_size) { /* initialize counters for file and byte count */ uint64_t my_count = 0; uint64_t my_size = 0; /* this is going to be a subset of the full file list */ mfu_flist filtered = mfu_flist_subset(list); uint64_t idx; uint64_t size = mfu_flist_size(list); for (idx = 0; idx < size; idx++) { /* we only care about regular files */ mfu_filetype type = mfu_flist_file_get_type(list, idx); if (type == MFU_TYPE_FILE) { /* if our file is below the minimum file size, skip it */ uint64_t filesize = mfu_flist_file_get_size(list, idx); if (filesize < min_size) { continue; } const char* in_path = mfu_flist_file_get_name(list, idx); uint64_t curr_stripe_size = 0; uint64_t curr_stripe_count = 0; /* * attempt to get striping info, * skip the file if we can't get the striping info we seek */ if (mfu_stripe_get(in_path, &curr_stripe_size, &curr_stripe_count) != 0) { continue; } /* TODO: this should probably be better */ /* if the current stripe size or stripe count doesn't match, then a restripe the file */ if (curr_stripe_count != stripe_count || curr_stripe_size != stripe_size) { mfu_flist_file_copy(list, idx, filtered); /* increment file count and add file size to our running total */ my_count += 1; my_size += filesize; } } } /* summarize and return the new list */ mfu_flist_summarize(filtered); /* get sum of count and size */ MPI_Allreduce(&my_count, total_count, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&my_size, total_size, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); return filtered; }
static int sort_files_readdir(const char* sortfields, mfu_flist* pflist) { /* get list from caller */ mfu_flist flist = *pflist; /* create a new list as subset of original list */ mfu_flist flist2 = mfu_flist_subset(flist); uint64_t incount = mfu_flist_size(flist); uint64_t chars = mfu_flist_file_max_name(flist); /* create datatype for packed file list element */ MPI_Datatype dt_sat; size_t bytes = mfu_flist_file_pack_size(flist); MPI_Type_contiguous((int)bytes, MPI_BYTE, &dt_sat); /* get our rank and the size of comm_world */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* build type for file path */ MPI_Datatype dt_filepath; MPI_Type_contiguous((int)chars, MPI_CHAR, &dt_filepath); MPI_Type_commit(&dt_filepath); /* build comparison op for filenames */ DTCMP_Op op_filepath; if (DTCMP_Op_create(dt_filepath, my_strcmp, &op_filepath) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create sorting operation for filepath"); } /* build comparison op for filenames */ DTCMP_Op op_filepath_rev; if (DTCMP_Op_create(dt_filepath, my_strcmp_rev, &op_filepath_rev) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create reverse sorting operation for filepath"); } /* TODO: process sort fields */ const int MAXFIELDS = 1; MPI_Datatype types[MAXFIELDS]; DTCMP_Op ops[MAXFIELDS]; sort_field fields[MAXFIELDS]; size_t lengths[MAXFIELDS]; int nfields = 0; for (nfields = 0; nfields < MAXFIELDS; nfields++) { types[nfields] = MPI_DATATYPE_NULL; ops[nfields] = DTCMP_OP_NULL; } nfields = 0; char* sortfields_copy = MFU_STRDUP(sortfields); char* token = strtok(sortfields_copy, ","); while (token != NULL) { int valid = 1; if (strcmp(token, "name") == 0) { types[nfields] = dt_filepath; ops[nfields] = op_filepath; fields[nfields] = FILENAME; lengths[nfields] = chars; } else if (strcmp(token, "-name") == 0) { types[nfields] = dt_filepath; ops[nfields] = op_filepath_rev; fields[nfields] = FILENAME; lengths[nfields] = chars; } else { /* invalid token */ valid = 0; if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Invalid sort field: %s\n", token); } } if (valid) { nfields++; } if (nfields > MAXFIELDS) { /* TODO: print warning if we have too many fields */ break; } token = strtok(NULL, ","); } mfu_free(&sortfields_copy); /* build key type */ MPI_Datatype dt_key; if (DTCMP_Type_create_series(nfields, types, &dt_key) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create type for key"); } /* create sort op */ DTCMP_Op op_key; if (DTCMP_Op_create_series(nfields, ops, &op_key) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create sorting operation for key"); } /* build keysat type */ MPI_Datatype dt_keysat, keysat_types[2]; keysat_types[0] = dt_key; keysat_types[1] = dt_sat; if (DTCMP_Type_create_series(2, keysat_types, &dt_keysat) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create type for keysat"); } /* get extent of key type */ MPI_Aint key_lb, key_extent; MPI_Type_get_extent(dt_key, &key_lb, &key_extent); /* get extent of keysat type */ MPI_Aint keysat_lb, keysat_extent; MPI_Type_get_extent(dt_keysat, &keysat_lb, &keysat_extent); /* get extent of sat type */ MPI_Aint sat_lb, sat_extent; MPI_Type_get_extent(dt_sat, &sat_lb, &sat_extent); /* compute size of sort element and allocate buffer */ size_t sortbufsize = (size_t)keysat_extent * incount; void* sortbuf = MFU_MALLOC(sortbufsize); /* copy data into sort elements */ uint64_t idx = 0; char* sortptr = (char*) sortbuf; while (idx < incount) { /* copy in access time */ int i; for (i = 0; i < nfields; i++) { if (fields[i] == FILENAME) { const char* name = mfu_flist_file_get_name(flist, idx); strcpy(sortptr, name); } sortptr += lengths[i]; } /* pack file element */ sortptr += mfu_flist_file_pack(sortptr, flist, idx); idx++; } /* sort data */ void* outsortbuf; int outsortcount; DTCMP_Handle handle; int sort_rc = DTCMP_Sortz( sortbuf, (int)incount, &outsortbuf, &outsortcount, dt_key, dt_keysat, op_key, DTCMP_FLAG_NONE, MPI_COMM_WORLD, &handle ); if (sort_rc != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to sort data"); } /* step through sorted data filenames */ idx = 0; sortptr = (char*) outsortbuf; while (idx < (uint64_t)outsortcount) { sortptr += key_extent; sortptr += mfu_flist_file_unpack(sortptr, flist2); idx++; } /* build summary of new list */ mfu_flist_summarize(flist2); /* free memory */ DTCMP_Free(&handle); /* free ops */ DTCMP_Op_free(&op_key); DTCMP_Op_free(&op_filepath_rev); DTCMP_Op_free(&op_filepath); /* free types */ MPI_Type_free(&dt_keysat); MPI_Type_free(&dt_key); MPI_Type_free(&dt_filepath); /* free input buffer holding sort elements */ mfu_free(&sortbuf); /* free the satellite type */ MPI_Type_free(&dt_sat); /* return new list and free old one */ *pflist = flist2; mfu_flist_free(&flist); return MFU_SUCCESS; }
/* given an input flist, return a newly allocated flist consisting of * a filtered set by finding all items that match/don't match a given * regular expression */ mfu_flist mfu_flist_filter_regex(mfu_flist flist, const char* regex_exp, int exclude, int name) { /* create our list to return */ mfu_flist dest = mfu_flist_subset(flist); /* check if user passed in an expression, if so then filter the list */ if (regex_exp != NULL) { /* compile regular expression, if it fails print error */ regex_t regex; int regex_return = regcomp(®ex, regex_exp, 0); if (regex_return) { MFU_ABORT(-1, "Could not compile regex: `%s' rc=%d\n", regex_exp, regex_return); } /* copy the things that don't or do (based on input) match the regex into a * filtered list */ uint64_t idx = 0; uint64_t size = mfu_flist_size(flist); while (idx < size) { /* get full path of item */ const char* file_name = mfu_flist_file_get_name(flist, idx); /* get basename of item (exclude the path) */ mfu_path* pathname = mfu_path_from_str(file_name); mfu_path_basename(pathname); char* base = mfu_path_strdup(pathname); /* execute regex on item, either against the basename or * the full path depending on name flag */ if (name) { /* run regex on basename */ regex_return = regexec(®ex, base, 0, NULL, 0); } else { /* run regex on full path */ regex_return = regexec(®ex, file_name, 0, NULL, 0); } /* copy item to the filtered list */ if (exclude) { /* user wants to exclude items that match, so copy everything that * does not match */ if (regex_return == REG_NOMATCH) { mfu_flist_file_copy(flist, idx, dest); } } else { /* user wants to copy over any matching items */ if (regex_return == 0) { mfu_flist_file_copy(flist, idx, dest); } } /* free the basename */ mfu_free(&base); mfu_path_delete(&pathname); /* get next item in our list */ idx++; } /* summarize the filtered list */ mfu_flist_summarize(dest); } /* return the filtered list */ return dest; }
/* given a list of files print from the start to end of the list */ void mfu_flist_print(mfu_flist flist) { /* number of items to print from start and end of list */ uint64_t range = 10; /* allocate send and receive buffers */ size_t pack_size = mfu_flist_file_pack_size(flist); size_t bufsize = 2 * range * pack_size; void* sendbuf = MFU_MALLOC(bufsize); void* recvbuf = MFU_MALLOC(bufsize); /* get our rank and the size of comm_world */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* identify the number of items we have, the total number, * and our offset in the global list */ uint64_t count = mfu_flist_size(flist); uint64_t total = mfu_flist_global_size(flist); uint64_t offset = mfu_flist_global_offset(flist); /* count the number of items we'll send */ int num = 0; uint64_t idx = 0; while (idx < count) { uint64_t global = offset + idx; if (global < range || (total - global) <= range) { num++; } idx++; } /* allocate arrays to store counts and displacements */ int* counts = (int*) MFU_MALLOC((size_t)ranks * sizeof(int)); int* disps = (int*) MFU_MALLOC((size_t)ranks * sizeof(int)); /* tell rank 0 where the data is coming from */ int bytes = num * (int)pack_size; MPI_Gather(&bytes, 1, MPI_INT, counts, 1, MPI_INT, 0, MPI_COMM_WORLD); /* pack items into sendbuf */ idx = 0; char* ptr = (char*) sendbuf; while (idx < count) { uint64_t global = offset + idx; if (global < range || (total - global) <= range) { ptr += mfu_flist_file_pack(ptr, flist, idx); } idx++; } /* compute displacements and total bytes */ int recvbytes = 0; if (rank == 0) { int i; disps[0] = 0; recvbytes += counts[0]; for (i = 1; i < ranks; i++) { disps[i] = disps[i - 1] + counts[i - 1]; recvbytes += counts[i]; } } /* gather data to rank 0 */ MPI_Gatherv(sendbuf, bytes, MPI_BYTE, recvbuf, counts, disps, MPI_BYTE, 0, MPI_COMM_WORLD); /* create temporary list to unpack items into */ mfu_flist tmplist = mfu_flist_subset(flist); /* unpack items into new list */ if (rank == 0) { ptr = (char*) recvbuf; char* end = ptr + recvbytes; while (ptr < end) { mfu_flist_file_unpack(ptr, tmplist); ptr += pack_size; } } /* summarize list */ mfu_flist_summarize(tmplist); /* print files */ if (rank == 0) { printf("\n"); uint64_t tmpidx = 0; uint64_t tmpsize = mfu_flist_size(tmplist); while (tmpidx < tmpsize) { print_file(tmplist, tmpidx); tmpidx++; if (tmpidx == range && total > 2 * range) { /* going to have to leave some out */ printf("\n<snip>\n\n"); } } printf("\n"); } /* free our temporary list */ mfu_flist_free(&tmplist); /* free memory */ mfu_free(&disps); mfu_free(&counts); mfu_free(&sendbuf); mfu_free(&recvbuf); return; }
/* given an input list and a map function pointer, call map function * for each item in list, identify new rank to send item to and then * exchange items among ranks and return new output list */ mfu_flist mfu_flist_remap(mfu_flist list, mfu_flist_map_fn map, const void* args) { uint64_t idx; /* create new list as subset (actually will be a remapping of * input list */ mfu_flist newlist = mfu_flist_subset(list); /* get our rank and number of ranks in job */ int ranks; MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* allocate arrays for alltoall */ size_t bufsize = (size_t)ranks * sizeof(int); int* sendsizes = (int*) MFU_MALLOC(bufsize); int* senddisps = (int*) MFU_MALLOC(bufsize); int* sendoffset = (int*) MFU_MALLOC(bufsize); int* recvsizes = (int*) MFU_MALLOC(bufsize); int* recvdisps = (int*) MFU_MALLOC(bufsize); /* initialize sendsizes and offsets */ int i; for (i = 0; i < ranks; i++) { sendsizes[i] = 0; sendoffset[i] = 0; } /* get number of elements in our local list */ uint64_t size = mfu_flist_size(list); /* allocate space to record file-to-rank mapping */ int* file2rank = (int*) MFU_MALLOC(size * sizeof(int)); /* call map function for each item to identify its new rank, * and compute number of bytes we'll send to each rank */ size_t sendbytes = 0; for (idx = 0; idx < size; idx++) { /* determine which rank we'll map this file to */ int dest = map(list, idx, ranks, args); /* cache mapping so we don't have to compute it again * below while packing items for send */ file2rank[idx] = dest; /* TODO: check that pack size doesn't overflow int */ /* total number of bytes we'll send to each rank and the total overall */ size_t count = mfu_flist_file_pack_size(list); sendsizes[dest] += (int) count; sendbytes += count; } /* compute send buffer displacements */ senddisps[0] = 0; for (i = 1; i < ranks; i++) { senddisps[i] = senddisps[i - 1] + sendsizes[i - 1]; } /* allocate space for send buffer */ char* sendbuf = (char*) MFU_MALLOC(sendbytes); /* copy data into send buffer */ for (idx = 0; idx < size; idx++) { /* determine which rank we mapped this file to */ int dest = file2rank[idx]; /* get pointer into send buffer and pack item */ char* ptr = sendbuf + senddisps[dest] + sendoffset[dest]; size_t count = mfu_flist_file_pack(ptr, list, idx); /* TODO: check that pack size doesn't overflow int */ /* bump up the offset for this rank */ sendoffset[dest] += (int) count; } /* alltoall to get our incoming counts */ MPI_Alltoall(sendsizes, 1, MPI_INT, recvsizes, 1, MPI_INT, MPI_COMM_WORLD); /* compute size of recvbuf and displacements */ size_t recvbytes = 0; recvdisps[0] = 0; for (i = 0; i < ranks; i++) { recvbytes += (size_t) recvsizes[i]; if (i > 0) { recvdisps[i] = recvdisps[i - 1] + recvsizes[i - 1]; } } /* allocate recvbuf */ char* recvbuf = (char*) MFU_MALLOC(recvbytes); /* alltoallv to send data */ MPI_Alltoallv( sendbuf, sendsizes, senddisps, MPI_CHAR, recvbuf, recvsizes, recvdisps, MPI_CHAR, MPI_COMM_WORLD ); /* unpack items into new list */ char* ptr = recvbuf; char* recvend = recvbuf + recvbytes; while (ptr < recvend) { size_t count = mfu_flist_file_unpack(ptr, newlist); ptr += count; } mfu_flist_summarize(newlist); /* free memory */ mfu_free(&file2rank); mfu_free(&recvbuf); mfu_free(&recvdisps); mfu_free(&recvsizes); mfu_free(&sendbuf); mfu_free(&sendoffset); mfu_free(&senddisps); mfu_free(&sendsizes); /* return list to caller */ return newlist; }