int main(int argc, char** argv) { /* initialize MPI */ MPI_Init(&argc, &argv); mfu_init(); /* get our rank and the size of comm_world */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* pointer to mfu_walk_opts */ mfu_walk_opts_t* walk_opts = mfu_walk_opts_new(); /* parse command line options */ char* inputname = NULL; char* ownername = NULL; char* groupname = NULL; char* modestr = NULL; char* regex_exp = NULL; mfu_perms* head = NULL; int walk = 0; int exclude = 0; int name = 0; /* verbose by default */ mfu_debug_level = MFU_LOG_VERBOSE; int option_index = 0; static struct option long_options[] = { {"input", 1, 0, 'i'}, {"owner", 1, 0, 'u'}, {"group", 1, 0, 'g'}, {"mode", 1, 0, 'm'}, {"exclude", 1, 0, 'e'}, {"match", 1, 0, 'a'}, {"name", 0, 0, 'n'}, {"progress", 1, 0, 'P'}, {"verbose", 0, 0, 'v'}, {"quiet", 0, 0, 'q'}, {"help", 0, 0, 'h'}, {0, 0, 0, 0} }; int usage = 0; while (1) { int c = getopt_long( argc, argv, "i:u:g:m:nvqh", long_options, &option_index ); if (c == -1) { break; } switch (c) { case 'i': inputname = MFU_STRDUP(optarg); break; case 'u': ownername = MFU_STRDUP(optarg); break; case 'g': groupname = MFU_STRDUP(optarg); break; case 'm': modestr = MFU_STRDUP(optarg); break; case 'e': regex_exp = MFU_STRDUP(optarg); exclude = 1; break; case 'a': regex_exp = MFU_STRDUP(optarg); exclude = 0; break; case 'n': name = 1; break; case 'P': mfu_progress_timeout = atoi(optarg); break; case 'v': mfu_debug_level = MFU_LOG_VERBOSE; break; case 'q': mfu_debug_level = MFU_LOG_NONE; break; case 'h': usage = 1; break; case '?': usage = 1; break; default: if (rank == 0) { printf("?? getopt returned character code 0%o ??\n", c); } } } /* check that we got a valid progress value */ if (mfu_progress_timeout < 0) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Seconds in --progress must be non-negative: %d invalid", mfu_progress_timeout); } usage = 1; } /* paths to walk come after the options */ int numpaths = 0; mfu_param_path* paths = NULL; if (optind < argc) { /* got a path to walk */ walk = 1; /* determine number of paths specified by user */ numpaths = argc - optind; /* allocate space for each path */ paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path)); /* process each path */ const char** argpaths = (const char**)(&argv[optind]); mfu_param_path_set_all(numpaths, argpaths, paths); /* advance to next set of options */ optind += numpaths; /* don't allow input file and walk */ if (inputname != NULL) { usage = 1; } } else { /* if we're not walking, we must be reading, * and for that we need a file */ if (inputname == NULL) { usage = 1; } } /* check that our mode string parses correctly */ if (modestr != NULL) { int valid = mfu_perms_parse(modestr, &head); if (! valid) { usage = 1; if (rank == 0) { printf("invalid mode string: %s\n", modestr); } /* free the head of the list */ mfu_perms_free(&head); } } /* print usage if we need to */ if (usage) { if (rank == 0) { print_usage(); } mfu_finalize(); MPI_Finalize(); return 1; } /* create an empty file list */ mfu_flist flist = mfu_flist_new(); /* flag used to check if permissions need to be * set on the walk */ if (head != NULL) { mfu_perms_need_dir_rx(head, walk_opts); } /* get our list of files, either by walking or reading an * input file */ if (walk) { /* if in octal mode set use_stat=0 to stat each file on walk */ if (head != NULL && head->octal && ownername == NULL && groupname == NULL) { walk_opts->use_stat = 0; } /* walk list of input paths */ mfu_flist_walk_param_paths(numpaths, paths, walk_opts, flist); } else { /* read list from file */ mfu_flist_read_cache(inputname, flist); } /* assume we'll use the full list */ mfu_flist srclist = flist; /* filter the list if needed */ mfu_flist filtered_flist = MFU_FLIST_NULL; if (regex_exp != NULL) { /* filter the list based on regex */ filtered_flist = mfu_flist_filter_regex(flist, regex_exp, exclude, name); /* update our source list to use the filtered list instead of the original */ srclist = filtered_flist; } /* change group and permissions */ mfu_flist_chmod(srclist, ownername, groupname, head); /* free list if it was used */ if (filtered_flist != MFU_FLIST_NULL){ /* free the filtered flist (if any) */ mfu_flist_free(&filtered_flist); } /* free the file list */ mfu_flist_free(&flist); /* free the path parameters */ mfu_param_path_free_all(numpaths, paths); /* free memory allocated to hold params */ mfu_free(&paths); /* free the owner and group names */ mfu_free(&ownername); mfu_free(&groupname); /* free the modestr */ mfu_free(&modestr); /* free the match_pattern if it isn't null */ if (regex_exp != NULL) { mfu_free(®ex_exp); } /* free the head of the list */ mfu_perms_free(&head); /* free the input file name */ mfu_free(&inputname); /* free the walk options */ mfu_walk_opts_delete(&walk_opts); /* shut down MPI */ mfu_finalize(); MPI_Finalize(); return 0; }
int main(int argc, char** argv) { int i; /* initialize MPI */ MPI_Init(&argc, &argv); mfu_init(); /* get our rank and the size of comm_world */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* parse command line options */ char* inputname = NULL; char* regex_exp = NULL; int walk = 0; int exclude = 0; int name = 0; int dryrun = 0; int option_index = 0; static struct option long_options[] = { {"input", 1, 0, 'i'}, {"lite", 0, 0, 'l'}, {"exclude", 1, 0, 'e'}, {"match", 1, 0, 'a'}, {"name", 0, 0, 'n'}, {"help", 0, 0, 'h'}, {"dryrun", 0, 0, 'd'}, {"verbose", 0, 0, 'v'}, {0, 0, 0, 0} }; int usage = 0; while (1) { int c = getopt_long( argc, argv, "i:nlhv", long_options, &option_index ); if (c == -1) { break; } switch (c) { case 'i': inputname = MFU_STRDUP(optarg); break; case 'l': walk_stat = 0; break; case 'e': regex_exp = MFU_STRDUP(optarg); exclude = 1; break; case 'a': regex_exp = MFU_STRDUP(optarg); exclude = 0; break; case 'n': name = 1; break; case 'h': usage = 1; break; case 'd': dryrun = 1; break; case 'v': mfu_debug_level = MFU_LOG_VERBOSE; break; case '?': usage = 1; break; default: if (rank == 0) { printf("?? getopt returned character code 0%o ??\n", c); } } } /* paths to walk come after the options */ int numpaths = 0; mfu_param_path* paths = NULL; if (optind < argc) { /* got a path to walk */ walk = 1; /* determine number of paths specified by user */ numpaths = argc - optind; /* allocate space for each path */ paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path)); /* process each path */ char** argpaths = &argv[optind]; mfu_param_path_set_all(numpaths, argpaths, paths); /* advance to next set of options */ optind += numpaths; /* don't allow input file and walk */ if (inputname != NULL) { usage = 1; } } else { /* if we're not walking, we must be reading, * and for that we need a file */ if (inputname == NULL) { usage = 1; } } /* print usage if we need to */ if (usage) { if (rank == 0) { print_usage(); } mfu_finalize(); MPI_Finalize(); return 1; } /* create an empty file list */ mfu_flist flist = mfu_flist_new(); /* get our list of files, either by walking or reading an * input file */ if (walk) { /* walk list of input paths */ mfu_param_path_walk(numpaths, paths, walk_stat, flist, dir_perm); } else { /* read list from file */ mfu_flist_read_cache(inputname, flist); } /* assume we'll use the full list */ mfu_flist srclist = flist; /* filter the list if needed */ mfu_flist filtered_flist = MFU_FLIST_NULL; if (regex_exp != NULL) { /* filter the list based on regex */ filtered_flist = mfu_flist_filter_regex(flist, regex_exp, exclude, name); /* update our source list to use the filtered list instead of the original */ srclist = filtered_flist; } /* only actually delete files if the user wasn't doing a dry run */ if (dryrun) { /* just print what we would delete without actually doing anything, * this is useful if the user is trying to get a regex right */ mfu_flist_print(srclist); } else { /* remove files */ mfu_flist_unlink(srclist); } /* free list if it was used */ if (filtered_flist != MFU_FLIST_NULL){ /* free the filtered flist (if any) */ mfu_flist_free(&filtered_flist); } /* free the file list */ mfu_flist_free(&flist); /* free the path parameters */ mfu_param_path_free_all(numpaths, paths); /* free memory allocated to hold params */ mfu_free(&paths); /* free the regex string if we have one */ mfu_free(®ex_exp); /* free the input file name */ mfu_free(&inputname); /* shut down MPI */ mfu_finalize(); MPI_Finalize(); return 0; }
int main (int argc, char** argv) { /* initialize MPI */ MPI_Init(&argc, &argv); mfu_init(); /* get our rank and the size of comm_world */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* pointer to mfu_walk_opts */ mfu_walk_opts_t* walk_opts = mfu_walk_opts_new(); /* capture current time for any time based queries, * to get a consistent value, capture and bcast from rank 0 */ mfu_pred_times* now_t = mfu_pred_now(); int ch; mfu_pred* pred_head = mfu_pred_new(); char* inputname = NULL; char* outputname = NULL; int walk = 0; int text = 0; static struct option long_options[] = { {"input", 1, 0, 'i'}, {"output", 1, 0, 'o'}, {"verbose", 0, 0, 'v'}, {"quiet", 0, 0, 'q'}, {"help", 0, 0, 'h'}, { "maxdepth", required_argument, NULL, 'd' }, { "amin", required_argument, NULL, 'a' }, { "anewer", required_argument, NULL, 'B' }, { "atime", required_argument, NULL, 'A' }, { "cmin", required_argument, NULL, 'c' }, { "cnewer", required_argument, NULL, 'D' }, { "ctime", required_argument, NULL, 'C' }, { "mmin", required_argument, NULL, 'm' }, { "newer", required_argument, NULL, 'N' }, { "mtime", required_argument, NULL, 'M' }, { "gid", required_argument, NULL, 'g' }, { "group", required_argument, NULL, 'G' }, { "uid", required_argument, NULL, 'u' }, { "user", required_argument, NULL, 'U' }, { "name", required_argument, NULL, 'n' }, { "path", required_argument, NULL, 'P' }, { "regex", required_argument, NULL, 'r' }, { "size", required_argument, NULL, 's' }, { "type", required_argument, NULL, 't' }, { "print", no_argument, NULL, 'p' }, { "exec", required_argument, NULL, 'e' }, { NULL, 0, NULL, 0 }, }; options.maxdepth = INT_MAX; int usage = 0; while (1) { int c = getopt_long( argc, argv, "i:o:vqh", long_options, NULL ); if (c == -1) { break; } int i; int space; char* buf; mfu_pred_times* t; mfu_pred_times_rel* tr; regex_t* r; int ret; /* verbose by default */ mfu_debug_level = MFU_LOG_VERBOSE; switch (c) { case 'e': space = 1024 * 1024; buf = (char *)MFU_MALLOC(space); for (i = optind-1; strcmp(";", argv[i]); i++) { if (i > argc) { if (rank == 0) { printf("%s: exec missing terminating ';'\n", argv[0]); } exit(1); } strncat(buf, argv[i], space); space -= strlen(argv[i]) + 1; /* save room for space or null */ if (space <= 0) { if (rank == 0) { printf("%s: exec argument list too long.\n", argv[0]); } mfu_free(&buf); continue; } strcat(buf, " "); optind++; } buf[strlen(buf)] = '\0'; /* clobbers trailing space */ mfu_pred_add(pred_head, MFU_PRED_EXEC, buf); break; case 'd': options.maxdepth = atoi(optarg); break; case 'g': /* TODO: error check argument */ buf = MFU_STRDUP(optarg); mfu_pred_add(pred_head, MFU_PRED_GID, (void *)buf); break; case 'G': buf = MFU_STRDUP(optarg); mfu_pred_add(pred_head, MFU_PRED_GROUP, (void *)buf); break; case 'u': /* TODO: error check argument */ buf = MFU_STRDUP(optarg); mfu_pred_add(pred_head, MFU_PRED_UID, (void *)buf); break; case 'U': buf = MFU_STRDUP(optarg); mfu_pred_add(pred_head, MFU_PRED_USER, (void *)buf); break; case 's': buf = MFU_STRDUP(optarg); mfu_pred_add(pred_head, MFU_PRED_SIZE, (void *)buf); break; case 'n': mfu_pred_add(pred_head, MFU_PRED_NAME, MFU_STRDUP(optarg)); break; case 'P': mfu_pred_add(pred_head, MFU_PRED_PATH, MFU_STRDUP(optarg)); break; case 'r': r = (regex_t*) MFU_MALLOC(sizeof(regex_t)); ret = regcomp(r, optarg, 0); if (ret) { MFU_ABORT(-1, "Could not compile regex: `%s' rc=%d\n", optarg, ret); } mfu_pred_add(pred_head, MFU_PRED_REGEX, (void*)r); break; case 'a': tr = mfu_pred_relative(optarg, now_t); mfu_pred_add(pred_head, MFU_PRED_AMIN, (void *)tr); break; case 'm': tr = mfu_pred_relative(optarg, now_t); mfu_pred_add(pred_head, MFU_PRED_MMIN, (void *)tr); break; case 'c': tr = mfu_pred_relative(optarg, now_t); mfu_pred_add(pred_head, MFU_PRED_CMIN, (void *)tr); break; case 'A': tr = mfu_pred_relative(optarg, now_t); mfu_pred_add(pred_head, MFU_PRED_ATIME, (void *)tr); break; case 'M': tr = mfu_pred_relative(optarg, now_t); mfu_pred_add(pred_head, MFU_PRED_MTIME, (void *)tr); break; case 'C': tr = mfu_pred_relative(optarg, now_t); mfu_pred_add(pred_head, MFU_PRED_CTIME, (void *)tr); break; case 'B': t = get_mtimes(optarg); if (t == NULL) { if (rank == 0) { printf("%s: can't find file %s\n", argv[0], optarg); } exit(1); } mfu_pred_add(pred_head, MFU_PRED_ANEWER, (void *)t); break; case 'N': t = get_mtimes(optarg); if (t == NULL) { if (rank == 0) { printf("%s: can't find file %s\n", argv[0], optarg); } exit(1); } mfu_pred_add(pred_head, MFU_PRED_MNEWER, (void *)t); break; case 'D': t = get_mtimes(optarg); if (t == NULL) { if (rank == 0) { printf("%s: can't find file %s\n", argv[0], optarg); } exit(1); } mfu_pred_add(pred_head, MFU_PRED_CNEWER, (void *)t); break; case 'p': mfu_pred_add(pred_head, MFU_PRED_PRINT, NULL); break; case 't': ret = add_type(pred_head, *optarg); if (ret != 1) { if (rank == 0) { printf("%s: unsupported file type %s\n", argv[0], optarg); } exit(1); } break; case 'i': inputname = MFU_STRDUP(optarg); break; case 'o': outputname = MFU_STRDUP(optarg); break; case 'v': mfu_debug_level = MFU_LOG_VERBOSE; break; case 'q': mfu_debug_level = MFU_LOG_NONE; break; case 'h': usage = 1; break; case '?': usage = 1; break; default: if (rank == 0) { printf("?? getopt returned character code 0%o ??\n", c); } } } pred_commit(pred_head); /* paths to walk come after the options */ int numpaths = 0; mfu_param_path* paths = NULL; if (optind < argc) { /* got a path to walk */ walk = 1; /* determine number of paths specified by user */ numpaths = argc - optind; /* allocate space for each path */ paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path)); /* process each path */ char** p = &argv[optind]; mfu_param_path_set_all((uint64_t)numpaths, (const char**)p, paths); optind += numpaths; /* don't allow user to specify input file with walk */ if (inputname != NULL) { usage = 1; } } else { /* if we're not walking, we must be reading, * and for that we need a file */ if (inputname == NULL) { usage = 1; } } if (usage) { if (rank == 0) { print_usage(); } mfu_finalize(); MPI_Finalize(); return 0; } /* create an empty file list */ mfu_flist flist = mfu_flist_new(); if (walk) { /* walk list of input paths */ mfu_flist_walk_param_paths(numpaths, paths, walk_opts, flist); } else { /* read data from cache file */ mfu_flist_read_cache(inputname, flist); } /* apply predicates to each item in list */ mfu_flist flist2 = mfu_flist_filter_pred(flist, pred_head); /* write data to cache file */ if (outputname != NULL) { if (!text) { mfu_flist_write_cache(outputname, flist2); } else { mfu_flist_write_text(outputname, flist2); } } /* free off the filtered list */ mfu_flist_free(&flist2); /* free users, groups, and files objects */ mfu_flist_free(&flist); /* free predicate list */ mfu_pred_free(&pred_head); /* free memory allocated for options */ mfu_free(&outputname); mfu_free(&inputname); /* free the path parameters */ mfu_param_path_free_all(numpaths, paths); /* free memory allocated to hold params */ mfu_free(&paths); /* free structure holding current time */ mfu_free(&now_t); /* free the walk options */ mfu_walk_opts_delete(&walk_opts); /* shut down MPI */ mfu_finalize(); MPI_Finalize(); return 0; }
int main(int argc, char* argv[]) { MPI_Init(&argc, &argv); mfu_init(); /* get our rank and number of ranks in the job */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* pointer to mfu_walk_opts */ mfu_walk_opts_t* walk_opts = mfu_walk_opts_new(); uint64_t idx; int option_index = 0; int usage = 0; int report = 0; unsigned int numpaths = 0; mfu_param_path* paths = NULL; unsigned long long bytes; /* verbose by default */ mfu_debug_level = MFU_LOG_VERBOSE; /* default to 1MB stripe size, stripe across all OSTs, and all files are candidates */ int stripes = -1; uint64_t stripe_size = 1048576; uint64_t min_size = 0; static struct option long_options[] = { {"count", 1, 0, 'c'}, {"size", 1, 0, 's'}, {"minsize", 1, 0, 'm'}, {"report", 0, 0, 'r'}, {"progress", 1, 0, 'P'}, {"verbose", 0, 0, 'v'}, {"quiet", 0, 0, 'q'}, {"help", 0, 0, 'h'}, {0, 0, 0, 0} }; while (1) { int c = getopt_long(argc, argv, "c:s:m:rvqh", long_options, &option_index); if (c == -1) { break; } switch (c) { case 'c': /* stripe count */ stripes = atoi(optarg); break; case 's': /* stripe size in bytes */ if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Failed to parse stripe size: %s", optarg); } MPI_Abort(MPI_COMM_WORLD, 1); } stripe_size = (uint64_t)bytes; break; case 'm': /* min file size in bytes */ if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Failed to parse minimum file size: %s", optarg); } MPI_Abort(MPI_COMM_WORLD, 1); } min_size = (uint64_t)bytes; break; case 'r': /* report striping info */ report = 1; break; case 'P': mfu_progress_timeout = atoi(optarg); break; case 'v': mfu_debug_level = MFU_LOG_VERBOSE; break; case 'q': mfu_debug_level = MFU_LOG_NONE; break; case 'h': /* display usage */ usage = 1; break; case '?': /* display usage */ usage = 1; break; default: if (rank == 0) { printf("?? getopt returned character code 0%o ??\n", c); } } } /* check that we got a valid progress value */ if (mfu_progress_timeout < 0) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Seconds in --progress must be non-negative: %d invalid", mfu_progress_timeout); } usage = 1; } /* paths to walk come after the options */ if (optind < argc) { /* determine number of paths specified by user */ numpaths = argc - optind; /* allocate space for each path */ paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path)); /* process each path */ char** p = &argv[optind]; mfu_param_path_set_all((uint64_t)numpaths, (const char**)p, paths); optind += numpaths; } else { usage = 1; } /* if we need to print usage, print it and exit */ if (usage) { if (rank == 0) { print_usage(); } mfu_finalize(); MPI_Finalize(); return 1; } /* nothing to do if lustre support is disabled */ #ifndef LUSTRE_SUPPORT if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Lustre support is disabled."); } MPI_Abort(MPI_COMM_WORLD, 1); #endif /* stripe count must be -1 for all available or greater than 0 */ if (stripes < -1) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Stripe count must be -1 for all servers, 0 for lustre file system default, or a positive value"); } MPI_Abort(MPI_COMM_WORLD, 1); } /* lustre requires stripe sizes to be aligned */ if (stripe_size > 0 && stripe_size % 65536 != 0) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Stripe size must be a multiple of 65536"); } MPI_Abort(MPI_COMM_WORLD, 1); } /* TODO: verify that source / target are on Lustre */ /* walk list of input paths and stat as we walk */ mfu_flist flist = mfu_flist_new(); mfu_flist_walk_param_paths(numpaths, paths, walk_opts, flist); /* filter down our list to files which don't meet our striping requirements */ mfu_flist filtered = filter_list(flist, stripes, stripe_size, min_size, &create_prog_count_total, &stripe_prog_bytes_total); mfu_flist_free(&flist); MPI_Barrier(MPI_COMM_WORLD); /* report the file size and stripe count of all files we found */ if (report) { /* report the files in our filtered list */ stripe_info_report(filtered); /* free the paths and our list */ mfu_flist_free(&filtered); mfu_param_path_free_all(numpaths, paths); mfu_free(&paths); /* finalize */ mfu_finalize(); MPI_Finalize(); return 0; } /* generate a global suffix for our temp files and have each node check it's list */ char suffix[8]; uint64_t retry; /* seed our random number generator */ srand(time(NULL)); /* keep trying to make a valid random suffix...*/ do { uint64_t attempt = 0; /* make rank 0 responsible for generating a random suffix */ if (rank == 0) { generate_suffix(suffix, sizeof(suffix)); } /* broadcast the random suffix to all ranks */ MPI_Bcast(suffix, sizeof(suffix), MPI_CHAR, 0, MPI_COMM_WORLD); /* check that the file doesn't already exist */ uint64_t size = mfu_flist_size(filtered); for (idx = 0; idx < size; idx++) { char temp_path[PATH_MAX]; strcpy(temp_path, mfu_flist_file_get_name(filtered, idx)); strcat(temp_path, suffix); if(!mfu_access(temp_path, F_OK)) { /* the file already exists */ attempt = 1; break; } } /* do a reduce to figure out if a rank has a file collision */ MPI_Allreduce(&attempt, &retry, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD); } while(retry != 0); /* initialize progress messages while creating files */ create_prog_count = 0; create_prog = mfu_progress_start(mfu_progress_timeout, 1, MPI_COMM_WORLD, create_progress_fn); /* create new files so we can restripe */ uint64_t size = mfu_flist_size(filtered); for (idx = 0; idx < size; idx++) { char temp_path[PATH_MAX]; strcpy(temp_path, mfu_flist_file_get_name(filtered, idx)); strcat(temp_path, suffix); /* create a striped file at the temp file path */ mfu_stripe_set(temp_path, stripe_size, stripes); /* update our status for file create progress */ create_prog_count++; mfu_progress_update(&create_prog_count, create_prog); } /* finalize file create progress messages */ mfu_progress_complete(&create_prog_count, &create_prog); MPI_Barrier(MPI_COMM_WORLD); /* initialize progress messages while copying data */ stripe_prog_bytes = 0; stripe_prog = mfu_progress_start(mfu_progress_timeout, 1, MPI_COMM_WORLD, stripe_progress_fn); /* found a suffix, now we need to break our files into chunks based on stripe size */ mfu_file_chunk* file_chunks = mfu_file_chunk_list_alloc(filtered, stripe_size); mfu_file_chunk* p = file_chunks; while (p != NULL) { /* build path to temp file */ char temp_path[PATH_MAX]; strcpy(temp_path, p->name); strcat(temp_path, suffix); /* write each chunk in our list */ write_file_chunk(p, temp_path); /* move on to next file chunk */ p = p->next; } mfu_file_chunk_list_free(&file_chunks); /* finalize progress messages */ mfu_progress_complete(&stripe_prog_bytes, &stripe_prog); MPI_Barrier(MPI_COMM_WORLD); /* remove input file and rename temp file */ for (idx = 0; idx < size; idx++) { /* build path to temp file */ const char *in_path = mfu_flist_file_get_name(filtered, idx); char out_path[PATH_MAX]; strcpy(out_path, in_path); strcat(out_path, suffix); /* change the mode of the newly restriped file to be the same as the old one */ mode_t mode = (mode_t) mfu_flist_file_get_mode(filtered, idx); if (mfu_chmod(out_path, mode) != 0) { MFU_LOG(MFU_LOG_ERR, "Failed to chmod file %s (%s)", out_path, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } /* rename the new, restriped file to the old name */ if (rename(out_path, in_path) != 0) { MFU_LOG(MFU_LOG_ERR, "Failed to rename file %s to %s", out_path, in_path); MPI_Abort(MPI_COMM_WORLD, 1); } } /* wait for everyone to finish */ MPI_Barrier(MPI_COMM_WORLD); /* free the walk options */ mfu_walk_opts_delete(&walk_opts); /* free filtered list, path parameters */ mfu_flist_free(&filtered); mfu_param_path_free_all(numpaths, paths); mfu_free(&paths); mfu_finalize(); MPI_Finalize(); return 0; }
int main(int argc, char** argv) { int i; /* initialize MPI */ MPI_Init(&argc, &argv); mfu_init(); /* get our rank and the size of comm_world */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* pointer to mfu_walk_opts */ mfu_walk_opts_t* walk_opts = mfu_walk_opts_new(); /* TODO: extend options * - allow user to cache scan result in file * - allow user to load cached scan as input * * - allow user to filter by user, group, or filename using keyword or regex * - allow user to specify time window * - allow user to specify file sizes * * - allow user to sort by different fields * - allow user to group output (sum all bytes, group by user) */ char* inputname = NULL; char* outputname = NULL; char* sortfields = NULL; char* distribution = NULL; int file_histogram = 0; int walk = 0; int print = 0; int text = 0; struct distribute_option option; /* verbose by default */ mfu_debug_level = MFU_LOG_VERBOSE; int option_index = 0; static struct option long_options[] = { {"input", 1, 0, 'i'}, {"output", 1, 0, 'o'}, {"text", 0, 0, 't'}, {"lite", 0, 0, 'l'}, {"sort", 1, 0, 's'}, {"distribution", 1, 0, 'd'}, {"file_histogram", 0, 0, 'f'}, {"print", 0, 0, 'p'}, {"verbose", 0, 0, 'v'}, {"quiet", 0, 0, 'q'}, {"help", 0, 0, 'h'}, {0, 0, 0, 0} }; int usage = 0; while (1) { int c = getopt_long( argc, argv, "i:o:tls:d:fpvqh", long_options, &option_index ); if (c == -1) { break; } switch (c) { case 'i': inputname = MFU_STRDUP(optarg); break; case 'o': outputname = MFU_STRDUP(optarg); break; case 'l': /* don't stat each file on the walk */ walk_opts->use_stat = 0; break; case 's': sortfields = MFU_STRDUP(optarg); break; case 'd': distribution = MFU_STRDUP(optarg); break; case 'f': file_histogram = 1; break; case 'p': print = 1; break; case 'v': mfu_debug_level = MFU_LOG_VERBOSE; break; case 'q': mfu_debug_level = 0; break; case 't': text = 1; break; case 'h': usage = 1; break; case '?': usage = 1; break; default: if (rank == 0) { printf("?? getopt returned character code 0%o ??\n", c); } } } /* paths to walk come after the options */ int numpaths = 0; mfu_param_path* paths = NULL; if (optind < argc) { /* got a path to walk */ walk = 1; /* determine number of paths specified by user */ numpaths = argc - optind; /* allocate space for each path */ paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path)); /* process each path */ char** p = &argv[optind]; mfu_param_path_set_all((uint64_t)numpaths, (const char**)p, paths); optind += numpaths; /* don't allow user to specify input file with walk */ if (inputname != NULL) { usage = 1; } } else { /* if we're not walking, we must be reading, * and for that we need a file */ if (inputname == NULL) { usage = 1; } } /* if user is trying to sort, verify the sort fields are valid */ if (sortfields != NULL) { int maxfields; int nfields = 0; char* sortfields_copy = MFU_STRDUP(sortfields); if (walk_opts->use_stat) { maxfields = 7; char* token = strtok(sortfields_copy, ","); while (token != NULL) { if (strcmp(token, "name") != 0 && strcmp(token, "-name") != 0 && strcmp(token, "user") != 0 && strcmp(token, "-user") != 0 && strcmp(token, "group") != 0 && strcmp(token, "-group") != 0 && strcmp(token, "uid") != 0 && strcmp(token, "-uid") != 0 && strcmp(token, "gid") != 0 && strcmp(token, "-gid") != 0 && strcmp(token, "atime") != 0 && strcmp(token, "-atime") != 0 && strcmp(token, "mtime") != 0 && strcmp(token, "-mtime") != 0 && strcmp(token, "ctime") != 0 && strcmp(token, "-ctime") != 0 && strcmp(token, "size") != 0 && strcmp(token, "-size") != 0) { /* invalid token */ if (rank == 0) { printf("Invalid sort field: %s\n", token); } usage = 1; } nfields++; token = strtok(NULL, ","); } } else { maxfields = 1; char* token = strtok(sortfields_copy, ","); while (token != NULL) { if (strcmp(token, "name") != 0 && strcmp(token, "-name") != 0) { /* invalid token */ if (rank == 0) { printf("Invalid sort field: %s\n", token); } usage = 1; } nfields++; token = strtok(NULL, ","); } } if (nfields > maxfields) { if (rank == 0) { printf("Exceeded maximum number of sort fields: %d\n", maxfields); } usage = 1; } mfu_free(&sortfields_copy); } if (distribution != NULL) { if (distribution_parse(&option, distribution) != 0) { if (rank == 0) { printf("Invalid distribution argument: %s\n", distribution); } usage = 1; } else if (rank == 0 && option.separator_number != 0) { printf("Separators: "); for (i = 0; i < option.separator_number; i++) { if (i != 0) { printf(", "); } printf("%"PRIu64, option.separators[i]); } printf("\n"); } } if (usage) { if (rank == 0) { print_usage(); } MPI_Finalize(); return 0; } /* TODO: check stat fields fit within MPI types */ // if (sizeof(st_uid) > uint64_t) error(); etc... /* create an empty file list with default values */ mfu_flist flist = mfu_flist_new(); if (walk) { /* walk list of input paths */ mfu_flist_walk_param_paths(numpaths, paths, walk_opts, flist); } else { /* read data from cache file */ mfu_flist_read_cache(inputname, flist); } /* TODO: filter files */ //filter_files(&flist); /* sort files */ if (sortfields != NULL) { /* TODO: don't sort unless all_count > 0 */ mfu_flist_sort(sortfields, &flist); } /* print details for individual files */ if (print) { mfu_flist_print(flist); } /* print summary statistics of flist */ mfu_flist_print_summary(flist); /* print distribution if user specified this option */ if (distribution != NULL || file_histogram) { print_flist_distribution(file_histogram, &option, &flist, rank); } /* write data to cache file */ if (outputname != NULL) { if (!text) { mfu_flist_write_cache(outputname, flist); } else { mfu_flist_write_text(outputname, flist); } } /* free users, groups, and files objects */ mfu_flist_free(&flist); /* free memory allocated for options */ mfu_free(&distribution); mfu_free(&sortfields); mfu_free(&outputname); mfu_free(&inputname); /* free the path parameters */ mfu_param_path_free_all(numpaths, paths); /* free memory allocated to hold params */ mfu_free(&paths); /* free the walk options */ mfu_walk_opts_delete(&walk_opts); /* shut down MPI */ mfu_finalize(); MPI_Finalize(); return 0; }
int main(int argc, char** argv) { uint64_t i; int status; uint64_t file_size; uint64_t chunk_size = DDUP_CHUNK_SIZE; SHA256_CTX* ctx_ptr; MPI_Init(NULL, NULL); mfu_init(); int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* pointer to mfu_walk_opts */ mfu_walk_opts_t* walk_opts = mfu_walk_opts_new(); mfu_debug_level = MFU_LOG_VERBOSE; static struct option long_options[] = { {"debug", 0, 0, 'd'}, {"verbose", 0, 0, 'v'}, {"quiet", 0, 0, 'q'}, {"help", 0, 0, 'h'}, {0, 0, 0, 0} }; /* Parse options */ int usage = 0; int help = 0; int c; int option_index = 0; while ((c = getopt_long(argc, argv, "d:vqh", \ long_options, &option_index)) != -1) { switch (c) { case 'd': if (strncmp(optarg, "fatal", 5) == 0) { mfu_debug_level = MFU_LOG_FATAL; if (rank == 0) MFU_LOG(MFU_LOG_INFO, "Debug level set to: fatal"); } else if (strncmp(optarg, "err", 3) == 0) { mfu_debug_level = MFU_LOG_ERR; if (rank == 0) MFU_LOG(MFU_LOG_INFO, "Debug level set to: " "errors"); } else if (strncmp(optarg, "warn", 4) == 0) { mfu_debug_level = MFU_LOG_WARN; if (rank == 0) MFU_LOG(MFU_LOG_INFO, "Debug level set to: " "warnings"); } else if (strncmp(optarg, "info", 4) == 0) { mfu_debug_level = MFU_LOG_INFO; if (rank == 0) MFU_LOG(MFU_LOG_INFO, "Debug level set to: info"); } else if (strncmp(optarg, "dbg", 3) == 0) { mfu_debug_level = MFU_LOG_DBG; if (rank == 0) MFU_LOG(MFU_LOG_INFO, "Debug level set to: debug"); } else { if (rank == 0) MFU_LOG(MFU_LOG_INFO, "Debug level `%s' not " "recognized. Defaulting to " "`info'.", optarg); } case 'h': usage = 1; help = 1; case 'v': mfu_debug_level = MFU_LOG_VERBOSE; break; case 'q': mfu_debug_level = MFU_LOG_NONE; break; case '?': usage = 1; help = 1; break; default: usage = 1; break; } } /* check that user gave us one and only one directory */ int numargs = argc - optind; if (numargs != 1) { /* missing the directory, so post a message, and print usage */ if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "You must specify a directory path"); } usage = 1; } /* print usage and bail if needed */ if (usage) { if (rank == 0) { print_usage(); } /* set error code base on whether user requested usage or not */ if (help) { status = 0; } else { status = -1; } MPI_Barrier(MPI_COMM_WORLD); goto out; } /* get the directory name */ const char* dir = argv[optind]; /* create MPI datatypes */ MPI_Datatype key; MPI_Datatype keysat; mpi_type_init(&key, &keysat); /* create DTCMP comparison operation */ DTCMP_Op cmp; mtcmp_cmp_init(&cmp); /* allocate buffer to read data from file */ char* chunk_buf = (char*)MFU_MALLOC(DDUP_CHUNK_SIZE); /* allocate a file list */ mfu_flist flist = mfu_flist_new(); /* Walk the path(s) to build the flist */ mfu_flist_walk_path(dir, walk_opts, flist); /* TODO: spread list among procs? */ /* get local number of items in flist */ uint64_t checking_files = mfu_flist_size(flist); /* allocate memory to hold SHA256 context values */ struct file_item* file_items = (struct file_item*) MFU_MALLOC(checking_files * sizeof(*file_items)); /* Allocate two lists of length size, where each * element has (DDUP_KEY_SIZE + 1) uint64_t values * (id, checksum, index) */ size_t list_bytes = checking_files * (DDUP_KEY_SIZE + 1) * sizeof(uint64_t); uint64_t* list = (uint64_t*) MFU_MALLOC(list_bytes); uint64_t* new_list = (uint64_t*) MFU_MALLOC(list_bytes); /* Initialize the list */ uint64_t* ptr = list; uint64_t new_checking_files = 0; for (i = 0; i < checking_files; i++) { /* check that item is a regular file */ mode_t mode = (mode_t) mfu_flist_file_get_mode(flist, i); if (! S_ISREG(mode)) { continue; } /* get the file size */ file_size = mfu_flist_file_get_size(flist, i); if (file_size == 0) { /* Files with size zero are not interesting at all */ continue; } /* for first pass, group all files with same file size */ ptr[0] = file_size; /* we'll leave the middle part of the key unset */ /* record our index in flist */ ptr[DDUP_KEY_SIZE] = i; /* initialize the SHA256 hash state for this file */ SHA256_Init(&file_items[i].ctx); /* increment our file count */ new_checking_files++; /* advance to next spot in the list */ ptr += DDUP_KEY_SIZE + 1; } /* reduce our list count based on any files filtered out above */ checking_files = new_checking_files; /* allocate arrays to hold result from DTCMP_Rankv call to * assign group and rank values to each item */ uint64_t output_bytes = checking_files * sizeof(uint64_t); uint64_t* group_id = (uint64_t*) MFU_MALLOC(output_bytes); uint64_t* group_ranks = (uint64_t*) MFU_MALLOC(output_bytes); uint64_t* group_rank = (uint64_t*) MFU_MALLOC(output_bytes); /* get total number of items across all tasks */ uint64_t sum_checking_files; MPI_Allreduce(&checking_files, &sum_checking_files, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); uint64_t chunk_id = 0; while (sum_checking_files > 1) { /* update the chunk id we'll read from all files */ chunk_id++; /* iterate over our list and compute SHA256 value for each */ ptr = list; for (i = 0; i < checking_files; i++) { /* get the flist index for this item */ uint64_t idx = ptr[DDUP_KEY_SIZE]; /* look up file name */ const char* fname = mfu_flist_file_get_name(flist, idx); /* look up file size */ file_size = mfu_flist_file_get_size(flist, idx); /* read a chunk of data from the file into chunk_buf */ uint64_t data_size; status = read_data(fname, chunk_buf, chunk_id, chunk_size, file_size, &data_size); if (status) { /* File size has been changed, TODO: handle */ printf("failed to read file %s, maybe file " "size has been modified during the " "process", fname); } /* update the SHA256 context for this file */ ctx_ptr = &file_items[idx].ctx; SHA256_Update(ctx_ptr, chunk_buf, data_size); /* * Use SHA256 value as key. * This is actually an hack, but SHA256_Final can't * be called multiple times with out changing ctx */ SHA256_CTX ctx_tmp; memcpy(&ctx_tmp, ctx_ptr, sizeof(ctx_tmp)); SHA256_Final((unsigned char*)(ptr + 1), &ctx_tmp); /* move on to next file in the list */ ptr += DDUP_KEY_SIZE + 1; } /* Assign group ids and compute group sizes */ uint64_t groups; DTCMP_Rankv( (int)checking_files, list, &groups, group_id, group_ranks, group_rank, key, keysat, cmp, DTCMP_FLAG_NONE, MPI_COMM_WORLD ); /* any files assigned to a group of size 1 is unique, * any files in groups sizes > 1 for which we've read * all bytes are the same, and filter all other files * into a new list for another iteration */ new_checking_files = 0; ptr = list; uint64_t* new_ptr = new_list; for (i = 0; i < checking_files; i++) { /* Get index into flist for this item */ uint64_t idx = ptr[DDUP_KEY_SIZE]; /* look up file name */ const char* fname = mfu_flist_file_get_name(flist, idx); /* look up file size */ file_size = mfu_flist_file_get_size(flist, idx); /* get a pointer to the SHA256 context for this file */ ctx_ptr = &file_items[idx].ctx; if (group_ranks[i] == 1) { /* * Only one file in this group, * mfu_flist_file_name(flist, idx) is unique */ } else if (file_size <= (chunk_id * chunk_size)) { /* * We've run out of bytes to checksum, and we * still have a group size > 1 * mfu_flist_file_name(flist, idx) is a * duplicate with other files that also have * matching group_id[i] */ unsigned char digest[SHA256_DIGEST_LENGTH]; SHA256_Final(digest, ctx_ptr); char digest_string[SHA256_DIGEST_LENGTH * 2 + 1]; dump_sha256_digest(digest_string, digest); printf("%s %s\n", fname, digest_string); } else { /* Have multiple files with the same checksum, * but still have bytes left to read, so keep * this file */ /* use new group ID to segregate files, * this id will be unique for all files of the * same size and having the same hash up to * this point */ new_ptr[0] = group_id[i]; /* Copy over flist index into new list entry */ new_ptr[DDUP_KEY_SIZE] = idx; /* got one more in the new list */ new_checking_files++; /* move on to next item in new list */ new_ptr += DDUP_KEY_SIZE + 1; MFU_LOG(MFU_LOG_DBG, "checking file " "\"%s\" for chunk index %d of size %" PRIu64"\n", fname, (int)chunk_id, chunk_size); } /* move on to next file in the list */ ptr += DDUP_KEY_SIZE + 1; } /* Swap lists */ uint64_t* tmp_list; tmp_list = list; list = new_list; new_list = tmp_list; /* Update size of current list */ checking_files = new_checking_files; /* Get new global list size */ MPI_Allreduce(&checking_files, &sum_checking_files, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); } /* free the walk options */ mfu_walk_opts_delete(&walk_opts); mfu_free(&group_rank); mfu_free(&group_ranks); mfu_free(&group_id); mfu_free(&new_list); mfu_free(&list); mfu_free(&file_items); mfu_free(&chunk_buf); mfu_flist_free(&flist); mtcmp_cmp_fini(&cmp); mpi_type_fini(&key, &keysat); status = 0; out: mfu_finalize(); MPI_Finalize(); return status; }