void DTAR_enqueue_copy(CIRCLE_handle *handle) { for (uint64_t idx = 0; idx < DTAR_count; idx++) { mfu_filetype type = mfu_flist_file_get_type(DTAR_flist, idx); /* add copy work only for files */ if (type == MFU_TYPE_FILE) { uint64_t dataoffset = DTAR_offsets[idx] + DTAR_HDR_LENGTH; const char * name = mfu_flist_file_get_name(DTAR_flist, idx); uint64_t size = mfu_flist_file_get_size(DTAR_flist, idx); /* compute number of chunks */ uint64_t num_chunks = size / DTAR_user_opts.chunk_size; for (uint64_t chunk_idx = 0; chunk_idx < num_chunks; chunk_idx++) { char* newop = DTAR_encode_operation( COPY_DATA, name, size, chunk_idx, dataoffset); handle->enqueue(newop); mfu_free(&newop); } /* create copy work for possibly last item */ if (num_chunks * DTAR_user_opts.chunk_size < size || num_chunks == 0) { char* newop = DTAR_encode_operation( COPY_DATA, name, size, num_chunks, dataoffset); handle->enqueue(newop); mfu_free(&newop); } } } }
static void DTAR_write_header(struct archive* ar, uint64_t idx, uint64_t offset) { /* allocate and entry for this item */ struct archive_entry* entry = archive_entry_new(); /* get file name for this item */ /* fill up entry, FIXME: the uglyness of removing leading slash */ const char* fname = mfu_flist_file_get_name(DTAR_flist, idx); archive_entry_copy_pathname(entry, &fname[1]); if (DTAR_user_opts.preserve) { struct archive* source = archive_read_disk_new(); archive_read_disk_set_standard_lookup(source); int fd = open(fname, O_RDONLY); if (archive_read_disk_entry_from_file(source, entry, fd, NULL) != ARCHIVE_OK) { MFU_LOG(MFU_LOG_ERR, "archive_read_disk_entry_from_file(): %s", archive_error_string(ar)); } archive_read_free(source); close(fd); } else { /* TODO: read stat info from mfu_flist */ struct stat stbuf; mfu_lstat(fname, &stbuf); archive_entry_copy_stat(entry, &stbuf); /* set user name of owner */ const char* uname = mfu_flist_file_get_username(DTAR_flist, idx); archive_entry_set_uname(entry, uname); /* set group name */ const char* gname = mfu_flist_file_get_groupname(DTAR_flist, idx); archive_entry_set_gname(entry, gname); } /* TODO: Seems to be a bug here potentially leading to corrupted * archive files. archive_write_free also writes two blocks of * NULL bytes at the end of an archive file, however, each rank * will have a different view of the length of the file, so one * rank may write its NULL blocks over top of the actual data * written by another rank */ /* write entry info to archive */ struct archive* dest = archive_write_new(); archive_write_set_format_pax(dest); if (archive_write_open_fd(dest, DTAR_writer.fd_tar) != ARCHIVE_OK) { MFU_LOG(MFU_LOG_ERR, "archive_write_open_fd(): %s", archive_error_string(ar)); } /* seek to offset in tar archive for this file */ lseek(DTAR_writer.fd_tar, offset, SEEK_SET); /* write header for this item */ if (archive_write_header(dest, entry) != ARCHIVE_OK) { MFU_LOG(MFU_LOG_ERR, "archive_write_header(): %s", archive_error_string(ar)); } archive_entry_free(entry); archive_write_free(dest); }
int MFU_PRED_EXEC (mfu_flist flist, uint64_t idx, void* arg) { int argmax = 1024*1024;; int written = 0; int ret; char* command = MFU_STRDUP((char*) arg); char* cmdline = (char*) MFU_MALLOC(argmax); char* subst = strstr(command, "{}"); if (subst) { subst[0] = '\0'; subst += 2; /* Point to the first char after '{}' */ } const char* name = mfu_flist_file_get_name(flist, idx); written = snprintf(cmdline, argmax/sizeof(char), "%s%s%s", command, name, subst); if (written > argmax/sizeof(char)) { fprintf(stderr, "argument %s to exec too long.\n", cmdline); mfu_free(&cmdline); mfu_free(&command); return -1; } ret = system(cmdline); mfu_free(&cmdline); mfu_free(&command); return ret ? 0 : 1; }
/* for given depth, just remove the files we know about */ static void remove_direct(mfu_flist list, uint64_t* rmcount) { /* each process directly removes its elements */ uint64_t idx; uint64_t size = mfu_flist_size(list); for (idx = 0; idx < size; idx++) { /* get name and type of item */ const char* name = mfu_flist_file_get_name(list, idx); mfu_filetype type = mfu_flist_file_get_type(list, idx); /* delete item */ if (type == MFU_TYPE_DIR) { remove_type('d', name); } else if (type == MFU_TYPE_FILE || type == MFU_TYPE_LINK) { remove_type('f', name); } else { remove_type('u', name); } } /* report the number of items we deleted */ *rmcount = size; return; }
/* filter the list of files down based on the current stripe size and stripe count */ static mfu_flist filter_list(mfu_flist list, int stripe_count, uint64_t stripe_size, uint64_t min_size, uint64_t* total_count, uint64_t* total_size) { /* initialize counters for file and byte count */ uint64_t my_count = 0; uint64_t my_size = 0; /* this is going to be a subset of the full file list */ mfu_flist filtered = mfu_flist_subset(list); uint64_t idx; uint64_t size = mfu_flist_size(list); for (idx = 0; idx < size; idx++) { /* we only care about regular files */ mfu_filetype type = mfu_flist_file_get_type(list, idx); if (type == MFU_TYPE_FILE) { /* if our file is below the minimum file size, skip it */ uint64_t filesize = mfu_flist_file_get_size(list, idx); if (filesize < min_size) { continue; } const char* in_path = mfu_flist_file_get_name(list, idx); uint64_t curr_stripe_size = 0; uint64_t curr_stripe_count = 0; /* * attempt to get striping info, * skip the file if we can't get the striping info we seek */ if (mfu_stripe_get(in_path, &curr_stripe_size, &curr_stripe_count) != 0) { continue; } /* TODO: this should probably be better */ /* if the current stripe size or stripe count doesn't match, then a restripe the file */ if (curr_stripe_count != stripe_count || curr_stripe_size != stripe_size) { mfu_flist_file_copy(list, idx, filtered); /* increment file count and add file size to our running total */ my_count += 1; my_size += filesize; } } } /* summarize and return the new list */ mfu_flist_summarize(filtered); /* get sum of count and size */ MPI_Allreduce(&my_count, total_count, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&my_size, total_size, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); return filtered; }
/* Given an input file list, stat each file and enqueue details * in output file list, skip entries excluded by skip function * and skip args */ void mfu_flist_stat( mfu_flist input_flist, mfu_flist flist, mfu_flist_skip_fn skip_fn, void *skip_args) { flist_t* file_list = (flist_t*)flist; /* we will stat all items in output list, so set detail to 1 */ file_list->detail = 1; /* get user data if needed */ if (file_list->have_users == 0) { mfu_flist_usrgrp_get_users(flist); } /* get groups data if needed */ if (file_list->have_groups == 0) { mfu_flist_usrgrp_get_groups(flist); } /* step through each item in input list and stat it */ uint64_t idx; uint64_t size = mfu_flist_size(input_flist); for (idx = 0; idx < size; idx++) { /* get name of item */ const char* name = mfu_flist_file_get_name(input_flist, idx); /* check whether we should skip this item */ if (skip_fn != NULL && skip_fn(name, skip_args)) { /* skip this file, don't include it in new list */ MFU_LOG(MFU_LOG_INFO, "skip %s"); continue; } /* stat the item */ struct stat st; int status = mfu_lstat(name, &st); if (status != 0) { MFU_LOG(MFU_LOG_ERR, "mfu_lstat() failed: `%s' rc=%d (errno=%d %s)", name, status, errno, strerror(errno)); continue; } /* insert item into output list */ mfu_flist_insert_stat(flist, name, st.st_mode, &st); } /* compute global summary */ mfu_flist_summarize(flist); }
/* we hash file names based on its parent directory to map all * files in the same directory to the same process */ static int map_name(mfu_flist flist, uint64_t idx, int ranks, void* args) { /* get name of item */ const char* name = mfu_flist_file_get_name(flist, idx); /* identify rank to send this file to */ char* dir = MFU_STRDUP(name); dirname(dir); size_t dir_len = strlen(dir); uint32_t hash = mfu_hash_jenkins(dir, dir_len); int rank = (int)(hash % (uint32_t)ranks); mfu_free(&dir); return rank; }
/* print to stdout the stripe size and count of each file in the mfu_flist */ static void stripe_info_report(mfu_flist list) { uint64_t idx; uint64_t size = mfu_flist_size(list); int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* print header */ if (rank == 0) { printf("%10s %3.3s %8.8s %s\n", "Size", "Cnt", "Str Size", "File Path"); printf("%10s %3.3s %8.8s %s\n", "----", "---", "--------", "---------"); fflush(stdout); } MPI_Barrier(MPI_COMM_WORLD); /* print out file info */ for (idx = 0; idx < size; idx++) { mfu_filetype type = mfu_flist_file_get_type(list, idx); /* report striping information for regular files only */ if (type == MFU_TYPE_FILE) { const char* in_path = mfu_flist_file_get_name(list, idx); uint64_t stripe_size = 0; uint64_t stripe_count = 0; char filesize[11]; char stripesize[9]; /* * attempt to get striping info and print it out, * skip the file if we can't get the striping info we seek */ if (mfu_stripe_get(in_path, &stripe_size, &stripe_count) != 0) { continue; } /* format it nicely */ generate_pretty_size(filesize, sizeof(filesize), mfu_flist_file_get_size(list, idx)); generate_pretty_size(stripesize, sizeof(stripesize), stripe_size); /* print the row */ printf("%10.10s %3" PRId64 " %8.8s %s\n", filesize, stripe_count, stripesize, in_path); fflush(stdout); } } }
void DTAR_write_header(struct archive *ar, uint64_t idx, uint64_t offset) { const char * fname = mfu_flist_file_get_name(DTAR_flist, idx); /* fill up entry, FIXME: the uglyness of removing leading slash */ struct archive_entry *entry = archive_entry_new(); archive_entry_copy_pathname(entry, &fname[1]); if (DTAR_user_opts.preserve) { struct archive * source = archive_read_disk_new(); archive_read_disk_set_standard_lookup(source); int fd = open(fname, O_RDONLY); if (archive_read_disk_entry_from_file(source, entry, fd, NULL) != ARCHIVE_OK) { MFU_LOG(MFU_LOG_ERR, "archive_read_disk_entry_from_file(): %s", archive_error_string(ar)); } archive_read_free(source); } else { /* read stat info from mfu_flist */ struct stat stbuf; mfu_lstat(fname, &stbuf); archive_entry_copy_stat(entry, &stbuf); const char* uname = mfu_flist_file_get_username(DTAR_flist, idx); archive_entry_set_uname(entry, uname); const char* gname = mfu_flist_file_get_groupname(DTAR_flist, idx); archive_entry_set_gname(entry, gname); } /* write entry info to archive */ struct archive* dest = archive_write_new(); archive_write_set_format_pax(dest); if (archive_write_open_fd(dest, DTAR_writer.fd_tar) != ARCHIVE_OK) { MFU_LOG(MFU_LOG_ERR, "archive_write_open_fd(): %s", archive_error_string(ar)); } lseek64(DTAR_writer.fd_tar, offset, SEEK_SET); if (archive_write_header(dest, entry) != ARCHIVE_OK) { MFU_LOG(MFU_LOG_ERR, "archive_write_header(): %s", archive_error_string(ar)); } archive_entry_free(entry); archive_write_free(dest); }
static void remove_create(CIRCLE_handle* handle) { char path[CIRCLE_MAX_STRING_LEN]; /* enqueues all items at rm_depth to be deleted */ uint64_t idx; uint64_t size = mfu_flist_size(circle_list); for (idx = 0; idx < size; idx++) { /* get name and type of item */ const char* name = mfu_flist_file_get_name(circle_list, idx); mfu_filetype type = mfu_flist_file_get_type(circle_list, idx); /* encode type */ if (type == MFU_TYPE_DIR) { path[0] = 'd'; } else if (type == MFU_TYPE_FILE || type == MFU_TYPE_LINK) { path[0] = 'f'; } else { path[0] = 'u'; } /* encode name */ size_t len = strlen(name) + 2; if (len <= CIRCLE_MAX_STRING_LEN) { strcpy(&path[1], name); handle->enqueue(path); } else { MFU_LOG(MFU_LOG_ERR, "Filename longer than %lu", (unsigned long)CIRCLE_MAX_STRING_LEN ); } } return; }
int main(int argc, char* argv[]) { MPI_Init(&argc, &argv); mfu_init(); /* get our rank and number of ranks in the job */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* pointer to mfu_walk_opts */ mfu_walk_opts_t* walk_opts = mfu_walk_opts_new(); uint64_t idx; int option_index = 0; int usage = 0; int report = 0; unsigned int numpaths = 0; mfu_param_path* paths = NULL; unsigned long long bytes; /* verbose by default */ mfu_debug_level = MFU_LOG_VERBOSE; /* default to 1MB stripe size, stripe across all OSTs, and all files are candidates */ int stripes = -1; uint64_t stripe_size = 1048576; uint64_t min_size = 0; static struct option long_options[] = { {"count", 1, 0, 'c'}, {"size", 1, 0, 's'}, {"minsize", 1, 0, 'm'}, {"report", 0, 0, 'r'}, {"progress", 1, 0, 'P'}, {"verbose", 0, 0, 'v'}, {"quiet", 0, 0, 'q'}, {"help", 0, 0, 'h'}, {0, 0, 0, 0} }; while (1) { int c = getopt_long(argc, argv, "c:s:m:rvqh", long_options, &option_index); if (c == -1) { break; } switch (c) { case 'c': /* stripe count */ stripes = atoi(optarg); break; case 's': /* stripe size in bytes */ if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Failed to parse stripe size: %s", optarg); } MPI_Abort(MPI_COMM_WORLD, 1); } stripe_size = (uint64_t)bytes; break; case 'm': /* min file size in bytes */ if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Failed to parse minimum file size: %s", optarg); } MPI_Abort(MPI_COMM_WORLD, 1); } min_size = (uint64_t)bytes; break; case 'r': /* report striping info */ report = 1; break; case 'P': mfu_progress_timeout = atoi(optarg); break; case 'v': mfu_debug_level = MFU_LOG_VERBOSE; break; case 'q': mfu_debug_level = MFU_LOG_NONE; break; case 'h': /* display usage */ usage = 1; break; case '?': /* display usage */ usage = 1; break; default: if (rank == 0) { printf("?? getopt returned character code 0%o ??\n", c); } } } /* check that we got a valid progress value */ if (mfu_progress_timeout < 0) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Seconds in --progress must be non-negative: %d invalid", mfu_progress_timeout); } usage = 1; } /* paths to walk come after the options */ if (optind < argc) { /* determine number of paths specified by user */ numpaths = argc - optind; /* allocate space for each path */ paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path)); /* process each path */ char** p = &argv[optind]; mfu_param_path_set_all((uint64_t)numpaths, (const char**)p, paths); optind += numpaths; } else { usage = 1; } /* if we need to print usage, print it and exit */ if (usage) { if (rank == 0) { print_usage(); } mfu_finalize(); MPI_Finalize(); return 1; } /* nothing to do if lustre support is disabled */ #ifndef LUSTRE_SUPPORT if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Lustre support is disabled."); } MPI_Abort(MPI_COMM_WORLD, 1); #endif /* stripe count must be -1 for all available or greater than 0 */ if (stripes < -1) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Stripe count must be -1 for all servers, 0 for lustre file system default, or a positive value"); } MPI_Abort(MPI_COMM_WORLD, 1); } /* lustre requires stripe sizes to be aligned */ if (stripe_size > 0 && stripe_size % 65536 != 0) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Stripe size must be a multiple of 65536"); } MPI_Abort(MPI_COMM_WORLD, 1); } /* TODO: verify that source / target are on Lustre */ /* walk list of input paths and stat as we walk */ mfu_flist flist = mfu_flist_new(); mfu_flist_walk_param_paths(numpaths, paths, walk_opts, flist); /* filter down our list to files which don't meet our striping requirements */ mfu_flist filtered = filter_list(flist, stripes, stripe_size, min_size, &create_prog_count_total, &stripe_prog_bytes_total); mfu_flist_free(&flist); MPI_Barrier(MPI_COMM_WORLD); /* report the file size and stripe count of all files we found */ if (report) { /* report the files in our filtered list */ stripe_info_report(filtered); /* free the paths and our list */ mfu_flist_free(&filtered); mfu_param_path_free_all(numpaths, paths); mfu_free(&paths); /* finalize */ mfu_finalize(); MPI_Finalize(); return 0; } /* generate a global suffix for our temp files and have each node check it's list */ char suffix[8]; uint64_t retry; /* seed our random number generator */ srand(time(NULL)); /* keep trying to make a valid random suffix...*/ do { uint64_t attempt = 0; /* make rank 0 responsible for generating a random suffix */ if (rank == 0) { generate_suffix(suffix, sizeof(suffix)); } /* broadcast the random suffix to all ranks */ MPI_Bcast(suffix, sizeof(suffix), MPI_CHAR, 0, MPI_COMM_WORLD); /* check that the file doesn't already exist */ uint64_t size = mfu_flist_size(filtered); for (idx = 0; idx < size; idx++) { char temp_path[PATH_MAX]; strcpy(temp_path, mfu_flist_file_get_name(filtered, idx)); strcat(temp_path, suffix); if(!mfu_access(temp_path, F_OK)) { /* the file already exists */ attempt = 1; break; } } /* do a reduce to figure out if a rank has a file collision */ MPI_Allreduce(&attempt, &retry, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD); } while(retry != 0); /* initialize progress messages while creating files */ create_prog_count = 0; create_prog = mfu_progress_start(mfu_progress_timeout, 1, MPI_COMM_WORLD, create_progress_fn); /* create new files so we can restripe */ uint64_t size = mfu_flist_size(filtered); for (idx = 0; idx < size; idx++) { char temp_path[PATH_MAX]; strcpy(temp_path, mfu_flist_file_get_name(filtered, idx)); strcat(temp_path, suffix); /* create a striped file at the temp file path */ mfu_stripe_set(temp_path, stripe_size, stripes); /* update our status for file create progress */ create_prog_count++; mfu_progress_update(&create_prog_count, create_prog); } /* finalize file create progress messages */ mfu_progress_complete(&create_prog_count, &create_prog); MPI_Barrier(MPI_COMM_WORLD); /* initialize progress messages while copying data */ stripe_prog_bytes = 0; stripe_prog = mfu_progress_start(mfu_progress_timeout, 1, MPI_COMM_WORLD, stripe_progress_fn); /* found a suffix, now we need to break our files into chunks based on stripe size */ mfu_file_chunk* file_chunks = mfu_file_chunk_list_alloc(filtered, stripe_size); mfu_file_chunk* p = file_chunks; while (p != NULL) { /* build path to temp file */ char temp_path[PATH_MAX]; strcpy(temp_path, p->name); strcat(temp_path, suffix); /* write each chunk in our list */ write_file_chunk(p, temp_path); /* move on to next file chunk */ p = p->next; } mfu_file_chunk_list_free(&file_chunks); /* finalize progress messages */ mfu_progress_complete(&stripe_prog_bytes, &stripe_prog); MPI_Barrier(MPI_COMM_WORLD); /* remove input file and rename temp file */ for (idx = 0; idx < size; idx++) { /* build path to temp file */ const char *in_path = mfu_flist_file_get_name(filtered, idx); char out_path[PATH_MAX]; strcpy(out_path, in_path); strcat(out_path, suffix); /* change the mode of the newly restriped file to be the same as the old one */ mode_t mode = (mode_t) mfu_flist_file_get_mode(filtered, idx); if (mfu_chmod(out_path, mode) != 0) { MFU_LOG(MFU_LOG_ERR, "Failed to chmod file %s (%s)", out_path, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } /* rename the new, restriped file to the old name */ if (rename(out_path, in_path) != 0) { MFU_LOG(MFU_LOG_ERR, "Failed to rename file %s to %s", out_path, in_path); MPI_Abort(MPI_COMM_WORLD, 1); } } /* wait for everyone to finish */ MPI_Barrier(MPI_COMM_WORLD); /* free the walk options */ mfu_walk_opts_delete(&walk_opts); /* free filtered list, path parameters */ mfu_flist_free(&filtered); mfu_param_path_free_all(numpaths, paths); mfu_free(&paths); mfu_finalize(); MPI_Finalize(); return 0; }
/* print information about a file given the index and rank (used in print_files) */ static void print_file(mfu_flist flist, uint64_t idx) { /* store types as strings for print_file */ char type_str_unknown[] = "UNK"; char type_str_dir[] = "DIR"; char type_str_file[] = "REG"; char type_str_link[] = "LNK"; /* get filename */ const char* file = mfu_flist_file_get_name(flist, idx); if (mfu_flist_have_detail(flist)) { /* get mode */ mode_t mode = (mode_t) mfu_flist_file_get_mode(flist, idx); //uint32_t uid = (uint32_t) mfu_flist_file_get_uid(flist, idx); //uint32_t gid = (uint32_t) mfu_flist_file_get_gid(flist, idx); uint64_t acc = mfu_flist_file_get_atime(flist, idx); uint64_t mod = mfu_flist_file_get_mtime(flist, idx); uint64_t cre = mfu_flist_file_get_ctime(flist, idx); uint64_t size = mfu_flist_file_get_size(flist, idx); const char* username = mfu_flist_file_get_username(flist, idx); const char* groupname = mfu_flist_file_get_groupname(flist, idx); char access_s[30]; char modify_s[30]; char create_s[30]; time_t access_t = (time_t) acc; time_t modify_t = (time_t) mod; time_t create_t = (time_t) cre; size_t access_rc = strftime(access_s, sizeof(access_s) - 1, "%FT%T", localtime(&access_t)); //size_t modify_rc = strftime(modify_s, sizeof(modify_s) - 1, "%FT%T", localtime(&modify_t)); size_t modify_rc = strftime(modify_s, sizeof(modify_s) - 1, "%b %e %Y %H:%M", localtime(&modify_t)); size_t create_rc = strftime(create_s, sizeof(create_s) - 1, "%FT%T", localtime(&create_t)); if (access_rc == 0 || modify_rc == 0 || create_rc == 0) { /* error */ access_s[0] = '\0'; modify_s[0] = '\0'; create_s[0] = '\0'; } char mode_format[11]; mfu_format_mode(mode, mode_format); double size_tmp; const char* size_units; mfu_format_bytes(size, &size_tmp, &size_units); printf("%s %s %s %7.3f %2s %s %s\n", mode_format, username, groupname, size_tmp, size_units, modify_s, file ); #if 0 printf("%s %s %s A%s M%s C%s %lu %s\n", mode_format, username, groupname, access_s, modify_s, create_s, (unsigned long)size, file ); printf("Mode=%lx(%s) UID=%d(%s) GUI=%d(%s) Access=%s Modify=%s Create=%s Size=%lu File=%s\n", (unsigned long)mode, mode_format, uid, username, gid, groupname, access_s, modify_s, create_s, (unsigned long)size, file ); #endif } else { /* get type */ mfu_filetype type = mfu_flist_file_get_type(flist, idx); char* type_str = type_str_unknown; if (type == MFU_TYPE_DIR) { type_str = type_str_dir; } else if (type == MFU_TYPE_FILE) { type_str = type_str_file; } else if (type == MFU_TYPE_LINK) { type_str = type_str_link; } printf("Type=%s File=%s\n", type_str, file ); } }
/* for each depth, sort files by filename and then remove, to test * whether it matters to limit the number of directories each process * has to reference (e.g., locking) */ static void remove_sort(mfu_flist list, uint64_t* rmcount) { /* bail out if total count is 0 */ uint64_t all_count = mfu_flist_global_size(list); if (all_count == 0) { return; } /* get maximum file name and number of items */ int chars = (int) mfu_flist_file_max_name(list); uint64_t my_count = mfu_flist_size(list); /* create key datatype (filename) and comparison op */ MPI_Datatype dt_key; DTCMP_Op op_str; DTCMP_Str_create_ascend(chars, &dt_key, &op_str); /* create keysat datatype (filename + type) */ MPI_Datatype types[2], dt_keysat; types[0] = dt_key; types[1] = MPI_CHAR; DTCMP_Type_create_series(2, types, &dt_keysat); /* allocate send buffer */ int sendcount = (int) my_count; size_t sendbufsize = (size_t)(sendcount * (chars + 1)); char* sendbuf = (char*) MFU_MALLOC(sendbufsize); /* copy data into buffer */ char* ptr = sendbuf; uint64_t idx; for (idx = 0; idx < my_count; idx++) { /* encode the filename first */ const char* name = mfu_flist_file_get_name(list, idx); strcpy(ptr, name); ptr += chars; /* last character encodes item type */ mfu_filetype type = mfu_flist_file_get_type(list, idx); if (type == MFU_TYPE_DIR) { ptr[0] = 'd'; } else if (type == MFU_TYPE_FILE || type == MFU_TYPE_LINK) { ptr[0] = 'f'; } else { ptr[0] = 'u'; } ptr++; } /* sort items */ void* recvbuf; int recvcount; DTCMP_Handle handle; DTCMP_Sortz( sendbuf, sendcount, &recvbuf, &recvcount, dt_key, dt_keysat, op_str, DTCMP_FLAG_NONE, MPI_COMM_WORLD, &handle ); /* delete data */ int delcount = 0; ptr = (char*)recvbuf; while (delcount < recvcount) { /* get item name */ char* name = ptr; ptr += chars; /* get item type */ char type = ptr[0]; ptr++; /* delete item */ remove_type(type, name); delcount++; } /* record number of items we deleted */ *rmcount = (uint64_t) delcount; /* free output data */ DTCMP_Free(&handle); /* free our send buffer */ mfu_free(&sendbuf); /* free key comparison operation */ DTCMP_Op_free(&op_str); /* free datatypes */ MPI_Type_free(&dt_keysat); MPI_Type_free(&dt_key); return; }
/* for given depth, evenly spread the files among processes for * improved load balancing */ static void remove_spread(mfu_flist flist, uint64_t* rmcount) { uint64_t idx; /* initialize our remove count */ *rmcount = 0; /* get our rank and number of ranks in job */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* allocate memory for alltoall exchanges */ size_t bufsize = (size_t)ranks * sizeof(int); int* sendcounts = (int*) MFU_MALLOC(bufsize); int* sendsizes = (int*) MFU_MALLOC(bufsize); int* senddisps = (int*) MFU_MALLOC(bufsize); int* recvsizes = (int*) MFU_MALLOC(bufsize); int* recvdisps = (int*) MFU_MALLOC(bufsize); /* get number of items */ uint64_t my_count = mfu_flist_size(flist); uint64_t all_count = mfu_flist_global_size(flist); uint64_t offset = mfu_flist_global_offset(flist); /* compute number of bytes we'll send */ size_t sendbytes = 0; for (idx = 0; idx < my_count; idx++) { const char* name = mfu_flist_file_get_name(flist, idx); size_t len = strlen(name) + 2; sendbytes += len; } /* compute the number of items that each rank should have */ uint64_t low = all_count / (uint64_t)ranks; uint64_t extra = all_count - low * (uint64_t)ranks; /* compute number that we'll send to each rank and initialize sendsizes and offsets */ uint64_t i; for (i = 0; i < (uint64_t)ranks; i++) { /* compute starting element id and count for given rank */ uint64_t start, num; if (i < extra) { num = low + 1; start = i * num; } else { num = low; start = (i - extra) * num + extra * (low + 1); } /* compute the number of items we'll send to this task */ uint64_t sendcnt = 0; if (my_count > 0) { if (start <= offset && offset < start + num) { /* this rank overlaps our range, * and its first element comes at or before our first element */ sendcnt = num - (offset - start); if (my_count < sendcnt) { /* the number the rank could receive from us * is more than we have left */ sendcnt = my_count; } } else if (offset < start && start < offset + my_count) { /* this rank overlaps our range, * and our first element comes strictly before its first element */ sendcnt = my_count - (start - offset); if (num < sendcnt) { /* the number the rank can receive from us * is less than we have left */ sendcnt = num; } } } /* record the number of items we'll send to this task */ sendcounts[i] = (int) sendcnt; /* set sizes and displacements to 0, we'll fix this later */ sendsizes[i] = 0; senddisps[i] = 0; } /* allocate space */ char* sendbuf = (char*) MFU_MALLOC(sendbytes); /* copy data into buffer */ int dest = -1; int disp = 0; for (idx = 0; idx < my_count; idx++) { /* get name and type of item */ const char* name = mfu_flist_file_get_name(flist, idx); mfu_filetype type = mfu_flist_file_get_type(flist, idx); /* get rank that we're packing data for */ if (dest == -1) { dest = get_first_nonzero(sendcounts, ranks); if (dest == -1) { /* error */ } /* about to copy first item for this rank, * record its displacement */ senddisps[dest] = disp; } /* identify region to be sent to rank */ char* path = sendbuf + disp; /* first character encodes item type */ if (type == MFU_TYPE_DIR) { path[0] = 'd'; } else if (type == MFU_TYPE_FILE || type == MFU_TYPE_LINK) { path[0] = 'f'; } else { path[0] = 'u'; } /* now copy in the path */ strcpy(&path[1], name); /* TODO: check that we don't overflow the int */ /* add bytes to sendsizes and increase displacement */ size_t count = strlen(name) + 2; sendsizes[dest] += (int) count; disp += (int) count; /* decrement the count for this rank */ sendcounts[dest]--; if (sendcounts[dest] == 0) { dest = -1; } } /* compute displacements */ senddisps[0] = 0; for (i = 1; i < (uint64_t)ranks; i++) { senddisps[i] = senddisps[i - 1] + sendsizes[i - 1]; } /* alltoall to specify incoming counts */ MPI_Alltoall(sendsizes, 1, MPI_INT, recvsizes, 1, MPI_INT, MPI_COMM_WORLD); /* compute size of recvbuf and displacements */ size_t recvbytes = 0; recvdisps[0] = 0; for (i = 0; i < (uint64_t)ranks; i++) { recvbytes += (size_t) recvsizes[i]; if (i > 0) { recvdisps[i] = recvdisps[i - 1] + recvsizes[i - 1]; } } /* allocate recvbuf */ char* recvbuf = (char*) MFU_MALLOC(recvbytes); /* alltoallv to send data */ MPI_Alltoallv( sendbuf, sendsizes, senddisps, MPI_CHAR, recvbuf, recvsizes, recvdisps, MPI_CHAR, MPI_COMM_WORLD ); /* delete data */ char* item = recvbuf; while (item < recvbuf + recvbytes) { /* get item name and type */ char type = item[0]; char* name = &item[1]; /* delete item */ remove_type(type, name); /* keep tally of number of items we deleted */ *rmcount++; /* go to next item */ size_t item_size = strlen(item) + 1; item += item_size; } /* free memory */ mfu_free(&recvbuf); mfu_free(&recvdisps); mfu_free(&recvsizes); mfu_free(&sendbuf); mfu_free(&senddisps); mfu_free(&sendsizes); mfu_free(&sendcounts); return; }
/* given an input flist, return a newly allocated flist consisting of * a filtered set by finding all items that match/don't match a given * regular expression */ mfu_flist mfu_flist_filter_regex(mfu_flist flist, const char* regex_exp, int exclude, int name) { /* create our list to return */ mfu_flist dest = mfu_flist_subset(flist); /* check if user passed in an expression, if so then filter the list */ if (regex_exp != NULL) { /* compile regular expression, if it fails print error */ regex_t regex; int regex_return = regcomp(®ex, regex_exp, 0); if (regex_return) { MFU_ABORT(-1, "Could not compile regex: `%s' rc=%d\n", regex_exp, regex_return); } /* copy the things that don't or do (based on input) match the regex into a * filtered list */ uint64_t idx = 0; uint64_t size = mfu_flist_size(flist); while (idx < size) { /* get full path of item */ const char* file_name = mfu_flist_file_get_name(flist, idx); /* get basename of item (exclude the path) */ mfu_path* pathname = mfu_path_from_str(file_name); mfu_path_basename(pathname); char* base = mfu_path_strdup(pathname); /* execute regex on item, either against the basename or * the full path depending on name flag */ if (name) { /* run regex on basename */ regex_return = regexec(®ex, base, 0, NULL, 0); } else { /* run regex on full path */ regex_return = regexec(®ex, file_name, 0, NULL, 0); } /* copy item to the filtered list */ if (exclude) { /* user wants to exclude items that match, so copy everything that * does not match */ if (regex_return == REG_NOMATCH) { mfu_flist_file_copy(flist, idx, dest); } } else { /* user wants to copy over any matching items */ if (regex_return == 0) { mfu_flist_file_copy(flist, idx, dest); } } /* free the basename */ mfu_free(&base); mfu_path_delete(&pathname); /* get next item in our list */ idx++; } /* summarize the filtered list */ mfu_flist_summarize(dest); } /* return the filtered list */ return dest; }
int MFU_PRED_PRINT (mfu_flist flist, uint64_t idx, void* arg) { const char* name = mfu_flist_file_get_name(flist, idx); printf("%s\n", name); return 1; }
/* removes list of items, sets write bits on directories from * top-to-bottom, then removes items one level at a time starting * from the deepest */ void mfu_flist_unlink(mfu_flist flist) { int level; /* wait for all tasks and start timer */ MPI_Barrier(MPI_COMM_WORLD); double start_remove = MPI_Wtime(); /* split files into separate lists by directory depth */ int levels, minlevel; mfu_flist* lists; mfu_flist_array_by_depth(flist, &levels, &minlevel, &lists); #if 0 /* dive from shallow to deep, ensure all directories have write bit set */ for (level = 0; level < levels; level++) { /* get list of items for this level */ mfu_flist list = lists[level]; /* determine whether we have details at this level */ int detail = mfu_flist_have_detail(list); /* iterate over items and set write bit on directories if needed */ uint64_t idx; uint64_t size = mfu_flist_size(list); for (idx = 0; idx < size; idx++) { /* check whether we have a directory */ mfu_filetype type = mfu_flist_file_get_type(list, idx); if (type == MFU_TYPE_DIR) { /* assume we have to set the bit */ int set_write_bit = 1; if (detail) { mode_t mode = (mode_t) mfu_flist_file_get_mode(list, idx); if (mode & S_IWUSR) { /* we have the mode of the file, and the bit is already set */ set_write_bit = 0; } } /* set the bit if needed */ if (set_write_bit) { const char* name = mfu_flist_file_get_name(list, idx); int rc = chmod(name, S_IRWXU); if (rc != 0) { MFU_LOG(MFU_LOG_ERR, "Failed to chmod directory `%s' (errno=%d %s)", name, errno, strerror(errno) ); } } } } /* wait for all procs to finish before we start next level */ MPI_Barrier(MPI_COMM_WORLD); } #endif /* now remove files starting from deepest level */ for (level = levels - 1; level >= 0; level--) { double start = MPI_Wtime(); /* get list of items for this level */ mfu_flist list = lists[level]; uint64_t count = 0; //remove_direct(list, &count); remove_spread(list, &count); // remove_map(list, &count); // remove_sort(list, &count); // remove_libcircle(list, &count); // TODO: remove sort w/ spread /* wait for all procs to finish before we start * with files at next level */ MPI_Barrier(MPI_COMM_WORLD); double end = MPI_Wtime(); if (mfu_debug_level >= MFU_LOG_VERBOSE) { uint64_t min, max, sum; MPI_Allreduce(&count, &min, 1, MPI_UINT64_T, MPI_MIN, MPI_COMM_WORLD); MPI_Allreduce(&count, &max, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(&count, &sum, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); double rate = 0.0; if (end - start > 0.0) { rate = (double)sum / (end - start); } double time_diff = end - start; if (mfu_rank == 0) { printf("level=%d min=%lu max=%lu sum=%lu rate=%f secs=%f\n", (minlevel + level), (unsigned long)min, (unsigned long)max, (unsigned long)sum, rate, time_diff ); fflush(stdout); } } } mfu_flist_array_free(levels, &lists); /* wait for all tasks and stop timer */ MPI_Barrier(MPI_COMM_WORLD); double end_remove = MPI_Wtime(); /* report remove count, time, and rate */ if (mfu_debug_level >= MFU_LOG_VERBOSE && mfu_rank == 0) { uint64_t all_count = mfu_flist_global_size(flist); double time_diff = end_remove - start_remove; double rate = 0.0; if (time_diff > 0.0) { rate = ((double)all_count) / time_diff; } printf("Removed %lu items in %f seconds (%f items/sec)\n", all_count, time_diff, rate ); } return; }
int main(int argc, char** argv) { uint64_t i; int status; uint64_t file_size; uint64_t chunk_size = DDUP_CHUNK_SIZE; SHA256_CTX* ctx_ptr; MPI_Init(NULL, NULL); mfu_init(); int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* pointer to mfu_walk_opts */ mfu_walk_opts_t* walk_opts = mfu_walk_opts_new(); mfu_debug_level = MFU_LOG_VERBOSE; static struct option long_options[] = { {"debug", 0, 0, 'd'}, {"verbose", 0, 0, 'v'}, {"quiet", 0, 0, 'q'}, {"help", 0, 0, 'h'}, {0, 0, 0, 0} }; /* Parse options */ int usage = 0; int help = 0; int c; int option_index = 0; while ((c = getopt_long(argc, argv, "d:vqh", \ long_options, &option_index)) != -1) { switch (c) { case 'd': if (strncmp(optarg, "fatal", 5) == 0) { mfu_debug_level = MFU_LOG_FATAL; if (rank == 0) MFU_LOG(MFU_LOG_INFO, "Debug level set to: fatal"); } else if (strncmp(optarg, "err", 3) == 0) { mfu_debug_level = MFU_LOG_ERR; if (rank == 0) MFU_LOG(MFU_LOG_INFO, "Debug level set to: " "errors"); } else if (strncmp(optarg, "warn", 4) == 0) { mfu_debug_level = MFU_LOG_WARN; if (rank == 0) MFU_LOG(MFU_LOG_INFO, "Debug level set to: " "warnings"); } else if (strncmp(optarg, "info", 4) == 0) { mfu_debug_level = MFU_LOG_INFO; if (rank == 0) MFU_LOG(MFU_LOG_INFO, "Debug level set to: info"); } else if (strncmp(optarg, "dbg", 3) == 0) { mfu_debug_level = MFU_LOG_DBG; if (rank == 0) MFU_LOG(MFU_LOG_INFO, "Debug level set to: debug"); } else { if (rank == 0) MFU_LOG(MFU_LOG_INFO, "Debug level `%s' not " "recognized. Defaulting to " "`info'.", optarg); } case 'h': usage = 1; help = 1; case 'v': mfu_debug_level = MFU_LOG_VERBOSE; break; case 'q': mfu_debug_level = MFU_LOG_NONE; break; case '?': usage = 1; help = 1; break; default: usage = 1; break; } } /* check that user gave us one and only one directory */ int numargs = argc - optind; if (numargs != 1) { /* missing the directory, so post a message, and print usage */ if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "You must specify a directory path"); } usage = 1; } /* print usage and bail if needed */ if (usage) { if (rank == 0) { print_usage(); } /* set error code base on whether user requested usage or not */ if (help) { status = 0; } else { status = -1; } MPI_Barrier(MPI_COMM_WORLD); goto out; } /* get the directory name */ const char* dir = argv[optind]; /* create MPI datatypes */ MPI_Datatype key; MPI_Datatype keysat; mpi_type_init(&key, &keysat); /* create DTCMP comparison operation */ DTCMP_Op cmp; mtcmp_cmp_init(&cmp); /* allocate buffer to read data from file */ char* chunk_buf = (char*)MFU_MALLOC(DDUP_CHUNK_SIZE); /* allocate a file list */ mfu_flist flist = mfu_flist_new(); /* Walk the path(s) to build the flist */ mfu_flist_walk_path(dir, walk_opts, flist); /* TODO: spread list among procs? */ /* get local number of items in flist */ uint64_t checking_files = mfu_flist_size(flist); /* allocate memory to hold SHA256 context values */ struct file_item* file_items = (struct file_item*) MFU_MALLOC(checking_files * sizeof(*file_items)); /* Allocate two lists of length size, where each * element has (DDUP_KEY_SIZE + 1) uint64_t values * (id, checksum, index) */ size_t list_bytes = checking_files * (DDUP_KEY_SIZE + 1) * sizeof(uint64_t); uint64_t* list = (uint64_t*) MFU_MALLOC(list_bytes); uint64_t* new_list = (uint64_t*) MFU_MALLOC(list_bytes); /* Initialize the list */ uint64_t* ptr = list; uint64_t new_checking_files = 0; for (i = 0; i < checking_files; i++) { /* check that item is a regular file */ mode_t mode = (mode_t) mfu_flist_file_get_mode(flist, i); if (! S_ISREG(mode)) { continue; } /* get the file size */ file_size = mfu_flist_file_get_size(flist, i); if (file_size == 0) { /* Files with size zero are not interesting at all */ continue; } /* for first pass, group all files with same file size */ ptr[0] = file_size; /* we'll leave the middle part of the key unset */ /* record our index in flist */ ptr[DDUP_KEY_SIZE] = i; /* initialize the SHA256 hash state for this file */ SHA256_Init(&file_items[i].ctx); /* increment our file count */ new_checking_files++; /* advance to next spot in the list */ ptr += DDUP_KEY_SIZE + 1; } /* reduce our list count based on any files filtered out above */ checking_files = new_checking_files; /* allocate arrays to hold result from DTCMP_Rankv call to * assign group and rank values to each item */ uint64_t output_bytes = checking_files * sizeof(uint64_t); uint64_t* group_id = (uint64_t*) MFU_MALLOC(output_bytes); uint64_t* group_ranks = (uint64_t*) MFU_MALLOC(output_bytes); uint64_t* group_rank = (uint64_t*) MFU_MALLOC(output_bytes); /* get total number of items across all tasks */ uint64_t sum_checking_files; MPI_Allreduce(&checking_files, &sum_checking_files, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); uint64_t chunk_id = 0; while (sum_checking_files > 1) { /* update the chunk id we'll read from all files */ chunk_id++; /* iterate over our list and compute SHA256 value for each */ ptr = list; for (i = 0; i < checking_files; i++) { /* get the flist index for this item */ uint64_t idx = ptr[DDUP_KEY_SIZE]; /* look up file name */ const char* fname = mfu_flist_file_get_name(flist, idx); /* look up file size */ file_size = mfu_flist_file_get_size(flist, idx); /* read a chunk of data from the file into chunk_buf */ uint64_t data_size; status = read_data(fname, chunk_buf, chunk_id, chunk_size, file_size, &data_size); if (status) { /* File size has been changed, TODO: handle */ printf("failed to read file %s, maybe file " "size has been modified during the " "process", fname); } /* update the SHA256 context for this file */ ctx_ptr = &file_items[idx].ctx; SHA256_Update(ctx_ptr, chunk_buf, data_size); /* * Use SHA256 value as key. * This is actually an hack, but SHA256_Final can't * be called multiple times with out changing ctx */ SHA256_CTX ctx_tmp; memcpy(&ctx_tmp, ctx_ptr, sizeof(ctx_tmp)); SHA256_Final((unsigned char*)(ptr + 1), &ctx_tmp); /* move on to next file in the list */ ptr += DDUP_KEY_SIZE + 1; } /* Assign group ids and compute group sizes */ uint64_t groups; DTCMP_Rankv( (int)checking_files, list, &groups, group_id, group_ranks, group_rank, key, keysat, cmp, DTCMP_FLAG_NONE, MPI_COMM_WORLD ); /* any files assigned to a group of size 1 is unique, * any files in groups sizes > 1 for which we've read * all bytes are the same, and filter all other files * into a new list for another iteration */ new_checking_files = 0; ptr = list; uint64_t* new_ptr = new_list; for (i = 0; i < checking_files; i++) { /* Get index into flist for this item */ uint64_t idx = ptr[DDUP_KEY_SIZE]; /* look up file name */ const char* fname = mfu_flist_file_get_name(flist, idx); /* look up file size */ file_size = mfu_flist_file_get_size(flist, idx); /* get a pointer to the SHA256 context for this file */ ctx_ptr = &file_items[idx].ctx; if (group_ranks[i] == 1) { /* * Only one file in this group, * mfu_flist_file_name(flist, idx) is unique */ } else if (file_size <= (chunk_id * chunk_size)) { /* * We've run out of bytes to checksum, and we * still have a group size > 1 * mfu_flist_file_name(flist, idx) is a * duplicate with other files that also have * matching group_id[i] */ unsigned char digest[SHA256_DIGEST_LENGTH]; SHA256_Final(digest, ctx_ptr); char digest_string[SHA256_DIGEST_LENGTH * 2 + 1]; dump_sha256_digest(digest_string, digest); printf("%s %s\n", fname, digest_string); } else { /* Have multiple files with the same checksum, * but still have bytes left to read, so keep * this file */ /* use new group ID to segregate files, * this id will be unique for all files of the * same size and having the same hash up to * this point */ new_ptr[0] = group_id[i]; /* Copy over flist index into new list entry */ new_ptr[DDUP_KEY_SIZE] = idx; /* got one more in the new list */ new_checking_files++; /* move on to next item in new list */ new_ptr += DDUP_KEY_SIZE + 1; MFU_LOG(MFU_LOG_DBG, "checking file " "\"%s\" for chunk index %d of size %" PRIu64"\n", fname, (int)chunk_id, chunk_size); } /* move on to next file in the list */ ptr += DDUP_KEY_SIZE + 1; } /* Swap lists */ uint64_t* tmp_list; tmp_list = list; list = new_list; new_list = tmp_list; /* Update size of current list */ checking_files = new_checking_files; /* Get new global list size */ MPI_Allreduce(&checking_files, &sum_checking_files, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); } /* free the walk options */ mfu_walk_opts_delete(&walk_opts); mfu_free(&group_rank); mfu_free(&group_ranks); mfu_free(&group_id); mfu_free(&new_list); mfu_free(&list); mfu_free(&file_items); mfu_free(&chunk_buf); mfu_flist_free(&flist); mtcmp_cmp_fini(&cmp); mpi_type_fini(&key, &keysat); status = 0; out: mfu_finalize(); MPI_Finalize(); return status; }
static int sort_files_readdir(const char* sortfields, mfu_flist* pflist) { /* get list from caller */ mfu_flist flist = *pflist; /* create a new list as subset of original list */ mfu_flist flist2 = mfu_flist_subset(flist); uint64_t incount = mfu_flist_size(flist); uint64_t chars = mfu_flist_file_max_name(flist); /* create datatype for packed file list element */ MPI_Datatype dt_sat; size_t bytes = mfu_flist_file_pack_size(flist); MPI_Type_contiguous((int)bytes, MPI_BYTE, &dt_sat); /* get our rank and the size of comm_world */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* build type for file path */ MPI_Datatype dt_filepath; MPI_Type_contiguous((int)chars, MPI_CHAR, &dt_filepath); MPI_Type_commit(&dt_filepath); /* build comparison op for filenames */ DTCMP_Op op_filepath; if (DTCMP_Op_create(dt_filepath, my_strcmp, &op_filepath) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create sorting operation for filepath"); } /* build comparison op for filenames */ DTCMP_Op op_filepath_rev; if (DTCMP_Op_create(dt_filepath, my_strcmp_rev, &op_filepath_rev) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create reverse sorting operation for filepath"); } /* TODO: process sort fields */ const int MAXFIELDS = 1; MPI_Datatype types[MAXFIELDS]; DTCMP_Op ops[MAXFIELDS]; sort_field fields[MAXFIELDS]; size_t lengths[MAXFIELDS]; int nfields = 0; for (nfields = 0; nfields < MAXFIELDS; nfields++) { types[nfields] = MPI_DATATYPE_NULL; ops[nfields] = DTCMP_OP_NULL; } nfields = 0; char* sortfields_copy = MFU_STRDUP(sortfields); char* token = strtok(sortfields_copy, ","); while (token != NULL) { int valid = 1; if (strcmp(token, "name") == 0) { types[nfields] = dt_filepath; ops[nfields] = op_filepath; fields[nfields] = FILENAME; lengths[nfields] = chars; } else if (strcmp(token, "-name") == 0) { types[nfields] = dt_filepath; ops[nfields] = op_filepath_rev; fields[nfields] = FILENAME; lengths[nfields] = chars; } else { /* invalid token */ valid = 0; if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Invalid sort field: %s\n", token); } } if (valid) { nfields++; } if (nfields > MAXFIELDS) { /* TODO: print warning if we have too many fields */ break; } token = strtok(NULL, ","); } mfu_free(&sortfields_copy); /* build key type */ MPI_Datatype dt_key; if (DTCMP_Type_create_series(nfields, types, &dt_key) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create type for key"); } /* create sort op */ DTCMP_Op op_key; if (DTCMP_Op_create_series(nfields, ops, &op_key) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create sorting operation for key"); } /* build keysat type */ MPI_Datatype dt_keysat, keysat_types[2]; keysat_types[0] = dt_key; keysat_types[1] = dt_sat; if (DTCMP_Type_create_series(2, keysat_types, &dt_keysat) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create type for keysat"); } /* get extent of key type */ MPI_Aint key_lb, key_extent; MPI_Type_get_extent(dt_key, &key_lb, &key_extent); /* get extent of keysat type */ MPI_Aint keysat_lb, keysat_extent; MPI_Type_get_extent(dt_keysat, &keysat_lb, &keysat_extent); /* get extent of sat type */ MPI_Aint sat_lb, sat_extent; MPI_Type_get_extent(dt_sat, &sat_lb, &sat_extent); /* compute size of sort element and allocate buffer */ size_t sortbufsize = (size_t)keysat_extent * incount; void* sortbuf = MFU_MALLOC(sortbufsize); /* copy data into sort elements */ uint64_t idx = 0; char* sortptr = (char*) sortbuf; while (idx < incount) { /* copy in access time */ int i; for (i = 0; i < nfields; i++) { if (fields[i] == FILENAME) { const char* name = mfu_flist_file_get_name(flist, idx); strcpy(sortptr, name); } sortptr += lengths[i]; } /* pack file element */ sortptr += mfu_flist_file_pack(sortptr, flist, idx); idx++; } /* sort data */ void* outsortbuf; int outsortcount; DTCMP_Handle handle; int sort_rc = DTCMP_Sortz( sortbuf, (int)incount, &outsortbuf, &outsortcount, dt_key, dt_keysat, op_key, DTCMP_FLAG_NONE, MPI_COMM_WORLD, &handle ); if (sort_rc != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to sort data"); } /* step through sorted data filenames */ idx = 0; sortptr = (char*) outsortbuf; while (idx < (uint64_t)outsortcount) { sortptr += key_extent; sortptr += mfu_flist_file_unpack(sortptr, flist2); idx++; } /* build summary of new list */ mfu_flist_summarize(flist2); /* free memory */ DTCMP_Free(&handle); /* free ops */ DTCMP_Op_free(&op_key); DTCMP_Op_free(&op_filepath_rev); DTCMP_Op_free(&op_filepath); /* free types */ MPI_Type_free(&dt_keysat); MPI_Type_free(&dt_key); MPI_Type_free(&dt_filepath); /* free input buffer holding sort elements */ mfu_free(&sortbuf); /* free the satellite type */ MPI_Type_free(&dt_sat); /* return new list and free old one */ *pflist = flist2; mfu_flist_free(&flist); return MFU_SUCCESS; }
/* copy all extended attributes from op->operand to dest_path */ void DCOPY_copy_xattrs( mfu_flist flist, uint64_t idx, const char* dest_path) { #if DCOPY_USE_XATTRS /* get source file name */ const char* src_path = mfu_flist_file_get_name(flist, idx); /* start with a reasonable buffer, we'll allocate more as needed */ size_t list_bufsize = 1204; char* list = (char*) MFU_MALLOC(list_bufsize); /* get list, if list_size == ERANGE, try again */ ssize_t list_size; int got_list = 0; /* get current estimate for list size */ while(! got_list) { list_size = llistxattr(src_path, list, list_bufsize); if(list_size < 0) { if(errno == ERANGE) { /* buffer is too small, free our current buffer * and call it again with size==0 to get new size */ mfu_free(&list); list_bufsize = 0; } else if(errno == ENOTSUP) { /* this is common enough that we silently ignore it */ break; } else { /* this is a real error */ MFU_LOG(MFU_LOG_ERR, "Failed to get list of extended attributes on %s llistxattr() errno=%d %s", src_path, errno, strerror(errno) ); break; } } else { if(list_size > 0 && list_bufsize == 0) { /* called llistxattr with size==0 and got back positive * number indicating size of buffer we need to allocate */ list_bufsize = (size_t) list_size; list = (char*) MFU_MALLOC(list_bufsize); } else { /* got our list, it's size is in list_size, which may be 0 */ got_list = 1; } } } /* iterate over list and copy values to new object lgetxattr/lsetxattr */ if(got_list) { char* name = list; while(name < list + list_size) { /* start with a reasonable buffer, * allocate something bigger as needed */ size_t val_bufsize = 1024; void* val = (void*) MFU_MALLOC(val_bufsize); /* lookup value for name */ ssize_t val_size; int got_val = 0; while(! got_val) { val_size = lgetxattr(src_path, name, val, val_bufsize); if(val_size < 0) { if(errno == ERANGE) { /* buffer is too small, free our current buffer * and call it again with size==0 to get new size */ mfu_free(&val); val_bufsize = 0; } else if(errno == ENOATTR) { /* source object no longer has this attribute, * maybe deleted out from under us */ break; } else { /* this is a real error */ MFU_LOG(MFU_LOG_ERR, "Failed to get value for name=%s on %s llistxattr() errno=%d %s", name, src_path, errno, strerror(errno) ); break; } } else { if(val_size > 0 && val_bufsize == 0) { /* called lgetxattr with size==0 and got back positive * number indicating size of buffer we need to allocate */ val_bufsize = (size_t) val_size; val = (void*) MFU_MALLOC(val_bufsize); } else { /* got our value, it's size is in val_size, which may be 0 */ got_val = 1; } } } /* set attribute on destination object */ if(got_val) { int setrc = lsetxattr(dest_path, name, val, (size_t) val_size, 0); if(setrc != 0) { MFU_LOG(MFU_LOG_ERR, "Failed to set value for name=%s on %s llistxattr() errno=%d %s", name, dest_path, errno, strerror(errno) ); } } /* free value string */ mfu_free(&val); val_bufsize = 0; /* jump to next name */ size_t namelen = strlen(name) + 1; name += namelen; } } /* free space allocated for list */ mfu_free(&list); list_bufsize = 0; return; #endif /* DCOPY_USE_XATTR */ }