/* given an input list, split items into separate lists depending * on their depth, returns number of levels, minimum depth, and * array of lists as output */ void mfu_flist_array_by_depth( mfu_flist srclist, int* outlevels, int* outmin, mfu_flist** outlists) { /* check that our pointers are valid */ if (outlevels == NULL || outmin == NULL || outlists == NULL) { return; } /* initialize return values */ *outlevels = 0; *outmin = -1; *outlists = NULL; /* get total file count */ uint64_t total = mfu_flist_global_size(srclist); if (total == 0) { return; } /* get min and max depths, determine number of levels, * allocate array of lists */ int min = mfu_flist_min_depth(srclist); int max = mfu_flist_max_depth(srclist); int levels = max - min + 1; mfu_flist* lists = (mfu_flist*) MFU_MALLOC((size_t)levels * sizeof(mfu_flist)); /* create a list for each level */ int i; for (i = 0; i < levels; i++) { lists[i] = mfu_flist_subset(srclist); } /* copy each item from source list to its corresponding level */ uint64_t idx = 0; uint64_t size = mfu_flist_size(srclist); while (idx < size) { int depth = mfu_flist_file_get_depth(srclist, idx); int depth_index = depth - min; mfu_flist dstlist = lists[depth_index]; mfu_flist_file_copy(srclist, idx, dstlist); idx++; } /* summarize each list */ for (i = 0; i < levels; i++) { mfu_flist_summarize(lists[i]); } /* set return parameters */ *outlevels = levels; *outmin = min; *outlists = lists; return; }
/* filter the list of files down based on the current stripe size and stripe count */ static mfu_flist filter_list(mfu_flist list, int stripe_count, uint64_t stripe_size, uint64_t min_size, uint64_t* total_count, uint64_t* total_size) { /* initialize counters for file and byte count */ uint64_t my_count = 0; uint64_t my_size = 0; /* this is going to be a subset of the full file list */ mfu_flist filtered = mfu_flist_subset(list); uint64_t idx; uint64_t size = mfu_flist_size(list); for (idx = 0; idx < size; idx++) { /* we only care about regular files */ mfu_filetype type = mfu_flist_file_get_type(list, idx); if (type == MFU_TYPE_FILE) { /* if our file is below the minimum file size, skip it */ uint64_t filesize = mfu_flist_file_get_size(list, idx); if (filesize < min_size) { continue; } const char* in_path = mfu_flist_file_get_name(list, idx); uint64_t curr_stripe_size = 0; uint64_t curr_stripe_count = 0; /* * attempt to get striping info, * skip the file if we can't get the striping info we seek */ if (mfu_stripe_get(in_path, &curr_stripe_size, &curr_stripe_count) != 0) { continue; } /* TODO: this should probably be better */ /* if the current stripe size or stripe count doesn't match, then a restripe the file */ if (curr_stripe_count != stripe_count || curr_stripe_size != stripe_size) { mfu_flist_file_copy(list, idx, filtered); /* increment file count and add file size to our running total */ my_count += 1; my_size += filesize; } } } /* summarize and return the new list */ mfu_flist_summarize(filtered); /* get sum of count and size */ MPI_Allreduce(&my_count, total_count, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&my_size, total_size, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); return filtered; }
/* given an input flist, return a newly allocated flist consisting of * a filtered set by finding all items that match/don't match a given * regular expression */ mfu_flist mfu_flist_filter_regex(mfu_flist flist, const char* regex_exp, int exclude, int name) { /* create our list to return */ mfu_flist dest = mfu_flist_subset(flist); /* check if user passed in an expression, if so then filter the list */ if (regex_exp != NULL) { /* compile regular expression, if it fails print error */ regex_t regex; int regex_return = regcomp(®ex, regex_exp, 0); if (regex_return) { MFU_ABORT(-1, "Could not compile regex: `%s' rc=%d\n", regex_exp, regex_return); } /* copy the things that don't or do (based on input) match the regex into a * filtered list */ uint64_t idx = 0; uint64_t size = mfu_flist_size(flist); while (idx < size) { /* get full path of item */ const char* file_name = mfu_flist_file_get_name(flist, idx); /* get basename of item (exclude the path) */ mfu_path* pathname = mfu_path_from_str(file_name); mfu_path_basename(pathname); char* base = mfu_path_strdup(pathname); /* execute regex on item, either against the basename or * the full path depending on name flag */ if (name) { /* run regex on basename */ regex_return = regexec(®ex, base, 0, NULL, 0); } else { /* run regex on full path */ regex_return = regexec(®ex, file_name, 0, NULL, 0); } /* copy item to the filtered list */ if (exclude) { /* user wants to exclude items that match, so copy everything that * does not match */ if (regex_return == REG_NOMATCH) { mfu_flist_file_copy(flist, idx, dest); } } else { /* user wants to copy over any matching items */ if (regex_return == 0) { mfu_flist_file_copy(flist, idx, dest); } } /* free the basename */ mfu_free(&base); mfu_path_delete(&pathname); /* get next item in our list */ idx++; } /* summarize the filtered list */ mfu_flist_summarize(dest); } /* return the filtered list */ return dest; }