コード例 #1
0
ファイル: common.c プロジェクト: hpc/fileutils
/* called globally by all procs to exit */
void DCOPY_exit(int code)
{
    /* CIRCLE_finalize or will this hang? */
    mfu_finalize();
    MPI_Finalize();
    exit(code);
}
コード例 #2
0
ファイル: dchmod.c プロジェクト: hpc/fileutils
int main(int argc, char** argv)
{
    /* initialize MPI */
    MPI_Init(&argc, &argv);
    mfu_init();

    /* get our rank and the size of comm_world */
    int rank, ranks;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &ranks);

    /* pointer to mfu_walk_opts */
    mfu_walk_opts_t* walk_opts = mfu_walk_opts_new();

    /* parse command line options */
    char* inputname = NULL;
    char* ownername = NULL;
    char* groupname = NULL;
    char* modestr   = NULL;
    char* regex_exp = NULL;
    mfu_perms* head = NULL;
    int walk        = 0;
    int exclude     = 0;
    int name        = 0;

    /* verbose by default */
    mfu_debug_level = MFU_LOG_VERBOSE;

    int option_index = 0;
    static struct option long_options[] = {
        {"input",    1, 0, 'i'},
        {"owner",    1, 0, 'u'},
        {"group",    1, 0, 'g'},
        {"mode",     1, 0, 'm'},
        {"exclude",  1, 0, 'e'},
        {"match",    1, 0, 'a'},
        {"name",     0, 0, 'n'},
        {"progress", 1, 0, 'P'},
        {"verbose",  0, 0, 'v'},
        {"quiet",    0, 0, 'q'},
        {"help",     0, 0, 'h'},
        {0, 0, 0, 0}
    };

    int usage = 0;
    while (1) {
        int c = getopt_long(
                    argc, argv, "i:u:g:m:nvqh",
                    long_options, &option_index
                );

        if (c == -1) {
            break;
        }

        switch (c) {
            case 'i':
                inputname = MFU_STRDUP(optarg);
                break;
            case 'u':
                ownername = MFU_STRDUP(optarg);
                break;
            case 'g':
                groupname = MFU_STRDUP(optarg);
                break;
            case 'm':
                modestr = MFU_STRDUP(optarg);
                break;
            case 'e':
                regex_exp = MFU_STRDUP(optarg);
                exclude = 1;
                break;
            case 'a':
                regex_exp = MFU_STRDUP(optarg);
                exclude = 0;
                break;
            case 'n':
                name = 1;
                break;
            case 'P':
                mfu_progress_timeout = atoi(optarg);
                break;
            case 'v':
                mfu_debug_level = MFU_LOG_VERBOSE;
                break;
            case 'q':
                mfu_debug_level = MFU_LOG_NONE;
                break;
            case 'h':
                usage = 1;
                break;
            case '?':
                usage = 1;
                break;
            default:
                if (rank == 0) {
                    printf("?? getopt returned character code 0%o ??\n", c);
                }
        }
    }

    /* check that we got a valid progress value */
    if (mfu_progress_timeout < 0) {
        if (rank == 0) {
            MFU_LOG(MFU_LOG_ERR, "Seconds in --progress must be non-negative: %d invalid", mfu_progress_timeout);
        }
        usage = 1;
    }

    /* paths to walk come after the options */
    int numpaths = 0;
    mfu_param_path* paths = NULL;
    if (optind < argc) {
        /* got a path to walk */
        walk = 1;

        /* determine number of paths specified by user */
        numpaths = argc - optind;

        /* allocate space for each path */
        paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path));

        /* process each path */
        const char** argpaths = (const char**)(&argv[optind]);
        mfu_param_path_set_all(numpaths, argpaths, paths);

        /* advance to next set of options */
        optind += numpaths;

        /* don't allow input file and walk */
        if (inputname != NULL) {
            usage = 1;
        }
    }
    else {
        /* if we're not walking, we must be reading,
         * and for that we need a file */
        if (inputname == NULL) {
            usage = 1;
        }
    }

    /* check that our mode string parses correctly */
    if (modestr != NULL) {
        int valid = mfu_perms_parse(modestr, &head);
        if (! valid) {
            usage = 1;
            if (rank == 0) {
                printf("invalid mode string: %s\n", modestr);
            }

            /* free the head of the list */
            mfu_perms_free(&head);
        }
    }

    /* print usage if we need to */
    if (usage) {
        if (rank == 0) {
            print_usage();
        }
        mfu_finalize();
        MPI_Finalize();
        return 1;
    }

    /* create an empty file list */
    mfu_flist flist = mfu_flist_new();

    /* flag used to check if permissions need to be
     * set on the walk */
    if (head != NULL) {
        mfu_perms_need_dir_rx(head, walk_opts);
    }

    /* get our list of files, either by walking or reading an
     * input file */
    if (walk) {
        /* if in octal mode set use_stat=0 to stat each file on walk */
        if (head != NULL && head->octal && ownername == NULL && groupname == NULL) {
            walk_opts->use_stat = 0;
        }
        /* walk list of input paths */
        mfu_flist_walk_param_paths(numpaths, paths, walk_opts, flist);
    }
    else {
        /* read list from file */
        mfu_flist_read_cache(inputname, flist);
    }

    /* assume we'll use the full list */
    mfu_flist srclist = flist;

    /* filter the list if needed */
    mfu_flist filtered_flist = MFU_FLIST_NULL;
    if (regex_exp != NULL) {
        /* filter the list based on regex */
        filtered_flist = mfu_flist_filter_regex(flist, regex_exp, exclude, name);

        /* update our source list to use the filtered list instead of the original */
        srclist = filtered_flist;
    }

    /* change group and permissions */
    mfu_flist_chmod(srclist, ownername, groupname, head);

    /* free list if it was used */
    if (filtered_flist != MFU_FLIST_NULL){
        /* free the filtered flist (if any) */
        mfu_flist_free(&filtered_flist);
    }

    /* free the file list */
    mfu_flist_free(&flist);

    /* free the path parameters */
    mfu_param_path_free_all(numpaths, paths);

    /* free memory allocated to hold params */
    mfu_free(&paths);

    /* free the owner and group names */
    mfu_free(&ownername);
    mfu_free(&groupname);

    /* free the modestr */
    mfu_free(&modestr);

    /* free the match_pattern if it isn't null */
    if (regex_exp != NULL) {
        mfu_free(&regex_exp);
    }

    /* free the head of the list */
    mfu_perms_free(&head);

    /* free the input file name */
    mfu_free(&inputname);

    /* free the walk options */
    mfu_walk_opts_delete(&walk_opts);

    /* shut down MPI */
    mfu_finalize();
    MPI_Finalize();

    return 0;
}
コード例 #3
0
int main(int argc, char** argv)
{
    int i;

    /* initialize MPI */
    MPI_Init(&argc, &argv);
    mfu_init();

    /* get our rank and the size of comm_world */
    int rank, ranks;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &ranks);

    /* parse command line options */
    char* inputname = NULL;
    char* regex_exp = NULL;
    int walk        = 0;
    int exclude     = 0;
    int name        = 0;
    int dryrun      = 0;

    int option_index = 0;
    static struct option long_options[] = {
        {"input",    1, 0, 'i'},
        {"lite",     0, 0, 'l'},
        {"exclude",  1, 0, 'e'},
        {"match",    1, 0, 'a'},
        {"name",     0, 0, 'n'},        
        {"help",     0, 0, 'h'},
        {"dryrun",   0, 0, 'd'},
        {"verbose",  0, 0, 'v'},
        {0, 0, 0, 0}
    };

    int usage = 0;
    while (1) {
        int c = getopt_long(
                    argc, argv, "i:nlhv",
                    long_options, &option_index
                );

        if (c == -1) {
            break;
        }

        switch (c) {
            case 'i':
                inputname = MFU_STRDUP(optarg);
                break;
            case 'l':
                walk_stat = 0;
                break;
            case 'e':
                regex_exp = MFU_STRDUP(optarg);
                exclude = 1;
                break;
            case 'a':
                regex_exp = MFU_STRDUP(optarg);
                exclude = 0;
                break;
            case 'n':
                name = 1;
                break;
            case 'h':
                usage = 1;
                break;
            case 'd':
                dryrun = 1;
                break;            
            case 'v':
                mfu_debug_level = MFU_LOG_VERBOSE;
                break;
            case '?':
                usage = 1;
                break;
            default:
                if (rank == 0) {
                    printf("?? getopt returned character code 0%o ??\n", c);
                }
        }
    }

    /* paths to walk come after the options */
    int numpaths = 0;
    mfu_param_path* paths = NULL;
    if (optind < argc) {
        /* got a path to walk */
        walk = 1;

        /* determine number of paths specified by user */
        numpaths = argc - optind;

        /* allocate space for each path */
        paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path));

        /* process each path */
        char** argpaths = &argv[optind];
        mfu_param_path_set_all(numpaths, argpaths, paths);

        /* advance to next set of options */
        optind += numpaths;

        /* don't allow input file and walk */
        if (inputname != NULL) {
            usage = 1;
        }
    }
    else {
        /* if we're not walking, we must be reading,
         * and for that we need a file */
        if (inputname == NULL) {
            usage = 1;
        }
    }

    /* print usage if we need to */
    if (usage) {
        if (rank == 0) {
            print_usage();
        }
        mfu_finalize();
        MPI_Finalize();
        return 1;
    }

    /* create an empty file list */
    mfu_flist flist = mfu_flist_new();

    /* get our list of files, either by walking or reading an
     * input file */
    if (walk) {
        /* walk list of input paths */
        mfu_param_path_walk(numpaths, paths, walk_stat, flist, dir_perm);
    }
    else {
        /* read list from file */
        mfu_flist_read_cache(inputname, flist);
    }

    /* assume we'll use the full list */
    mfu_flist srclist = flist;

    /* filter the list if needed */
    mfu_flist filtered_flist = MFU_FLIST_NULL;
    if (regex_exp != NULL) {
        /* filter the list based on regex */
        filtered_flist = mfu_flist_filter_regex(flist, regex_exp, exclude, name);

        /* update our source list to use the filtered list instead of the original */
        srclist = filtered_flist;
    }

    /* only actually delete files if the user wasn't doing a dry run */
    if (dryrun) {
        /* just print what we would delete without actually doing anything,
         * this is useful if the user is trying to get a regex right */
        mfu_flist_print(srclist);
    } else {
        /* remove files */
        mfu_flist_unlink(srclist);
    }

    /* free list if it was used */
    if (filtered_flist != MFU_FLIST_NULL){
        /* free the filtered flist (if any) */
        mfu_flist_free(&filtered_flist);
    }

    /* free the file list */
    mfu_flist_free(&flist);

    /* free the path parameters */
    mfu_param_path_free_all(numpaths, paths);

    /* free memory allocated to hold params */
    mfu_free(&paths);

    /* free the regex string if we have one */
    mfu_free(&regex_exp);

    /* free the input file name */
    mfu_free(&inputname);

    /* shut down MPI */
    mfu_finalize();
    MPI_Finalize();

    return 0;
}
コード例 #4
0
ファイル: dfind.c プロジェクト: hpc/fileutils
int main (int argc, char** argv)
{
    /* initialize MPI */
    MPI_Init(&argc, &argv);
    mfu_init();

    /* get our rank and the size of comm_world */
    int rank, ranks;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &ranks);

    /* pointer to mfu_walk_opts */
    mfu_walk_opts_t* walk_opts = mfu_walk_opts_new();

    /* capture current time for any time based queries,
     * to get a consistent value, capture and bcast from rank 0 */
    mfu_pred_times* now_t = mfu_pred_now();

    int ch;

    mfu_pred* pred_head = mfu_pred_new();
    char* inputname  = NULL;
    char* outputname = NULL;
    int walk = 0;
    int text = 0;

    static struct option long_options[] = {
        {"input",     1, 0, 'i'},
        {"output",    1, 0, 'o'},
        {"verbose",   0, 0, 'v'},
        {"quiet",     0, 0, 'q'},
        {"help",      0, 0, 'h'},

        { "maxdepth", required_argument, NULL, 'd' },

        { "amin",     required_argument, NULL, 'a' },
        { "anewer",   required_argument, NULL, 'B' },
        { "atime",    required_argument, NULL, 'A' },
        { "cmin",     required_argument, NULL, 'c' },
        { "cnewer",   required_argument, NULL, 'D' },
        { "ctime",    required_argument, NULL, 'C' },
        { "mmin",     required_argument, NULL, 'm' },
        { "newer",    required_argument, NULL, 'N' },
        { "mtime",    required_argument, NULL, 'M' },

        { "gid",      required_argument, NULL, 'g' },
        { "group",    required_argument, NULL, 'G' },
        { "uid",      required_argument, NULL, 'u' },
        { "user",     required_argument, NULL, 'U' },

        { "name",     required_argument, NULL, 'n' },
        { "path",     required_argument, NULL, 'P' },
        { "regex",    required_argument, NULL, 'r' },
        { "size",     required_argument, NULL, 's' },
        { "type",     required_argument, NULL, 't' },

        { "print",    no_argument,       NULL, 'p' },
        { "exec",     required_argument, NULL, 'e' },
        { NULL, 0, NULL, 0 },
    };

    options.maxdepth = INT_MAX;

    int usage = 0;
    while (1) {
        int c = getopt_long(
                    argc, argv, "i:o:vqh",
                    long_options, NULL
                );

        if (c == -1) {
            break;
        }

        int i;
        int space;
        char* buf;
        mfu_pred_times* t;
        mfu_pred_times_rel* tr;
        regex_t* r;
        int ret;

        /* verbose by default */
        mfu_debug_level = MFU_LOG_VERBOSE;

    	switch (c) {
    	case 'e':
            space = 1024 * 1024;
    	    buf = (char *)MFU_MALLOC(space);
    	    for (i = optind-1; strcmp(";", argv[i]); i++) {
    	        if (i > argc) {
                    if (rank == 0) {
    	                printf("%s: exec missing terminating ';'\n", argv[0]);
                    }
    	            exit(1);
    	        }
    	        strncat(buf, argv[i], space);
    	        space -= strlen(argv[i]) + 1; /* save room for space or null */
    	        if (space <= 0) {
                    if (rank == 0) {
    	                printf("%s: exec argument list too long.\n", argv[0]);
                    }
    	            mfu_free(&buf);
    	            continue;
    	        }
    	        strcat(buf, " ");
    	        optind++;
    	    }
    	    buf[strlen(buf)] = '\0'; /* clobbers trailing space */
    	    mfu_pred_add(pred_head, MFU_PRED_EXEC, buf);
    	    break;

    	case 'd':
    	    options.maxdepth = atoi(optarg);
    	    break;

    	case 'g':
            /* TODO: error check argument */
    	    buf = MFU_STRDUP(optarg);
    	    mfu_pred_add(pred_head, MFU_PRED_GID, (void *)buf);
    	    break;

    	case 'G':
    	    buf = MFU_STRDUP(optarg);
    	    mfu_pred_add(pred_head, MFU_PRED_GROUP, (void *)buf);
    	    break;

    	case 'u':
            /* TODO: error check argument */
    	    buf = MFU_STRDUP(optarg);
    	    mfu_pred_add(pred_head, MFU_PRED_UID, (void *)buf);
    	    break;

    	case 'U':
    	    buf = MFU_STRDUP(optarg);
    	    mfu_pred_add(pred_head, MFU_PRED_USER, (void *)buf);
    	    break;

    	case 's':
    	    buf = MFU_STRDUP(optarg);
    	    mfu_pred_add(pred_head, MFU_PRED_SIZE, (void *)buf);
    	    break;

    	case 'n':
    	    mfu_pred_add(pred_head, MFU_PRED_NAME, MFU_STRDUP(optarg));
    	    break;
    	case 'P':
    	    mfu_pred_add(pred_head, MFU_PRED_PATH, MFU_STRDUP(optarg));
    	    break;
    	case 'r':
            r = (regex_t*) MFU_MALLOC(sizeof(regex_t));
            ret = regcomp(r, optarg, 0);
            if (ret) {
                MFU_ABORT(-1, "Could not compile regex: `%s' rc=%d\n", optarg, ret);
            }
    	    mfu_pred_add(pred_head, MFU_PRED_REGEX, (void*)r);
    	    break;

    	case 'a':
            tr = mfu_pred_relative(optarg, now_t);
    	    mfu_pred_add(pred_head, MFU_PRED_AMIN, (void *)tr);
    	    break;
    	case 'm':
            tr = mfu_pred_relative(optarg, now_t);
    	    mfu_pred_add(pred_head, MFU_PRED_MMIN, (void *)tr);
    	    break;
    	case 'c':
            tr = mfu_pred_relative(optarg, now_t);
    	    mfu_pred_add(pred_head, MFU_PRED_CMIN, (void *)tr);
    	    break;

    	case 'A':
            tr = mfu_pred_relative(optarg, now_t);
    	    mfu_pred_add(pred_head, MFU_PRED_ATIME, (void *)tr);
    	    break;
    	case 'M':
            tr = mfu_pred_relative(optarg, now_t);
    	    mfu_pred_add(pred_head, MFU_PRED_MTIME, (void *)tr);
    	    break;
    	case 'C':
            tr = mfu_pred_relative(optarg, now_t);
    	    mfu_pred_add(pred_head, MFU_PRED_CTIME, (void *)tr);
    	    break;

    	case 'B':
            t = get_mtimes(optarg);
            if (t == NULL) {
                if (rank == 0) {
    	            printf("%s: can't find file %s\n", argv[0], optarg);
                }
    	        exit(1);
    	    }
    	    mfu_pred_add(pred_head, MFU_PRED_ANEWER, (void *)t);
    	    break;
    	case 'N':
            t = get_mtimes(optarg);
            if (t == NULL) {
                if (rank == 0) {
    	            printf("%s: can't find file %s\n", argv[0], optarg);
                }
    	        exit(1);
    	    }
    	    mfu_pred_add(pred_head, MFU_PRED_MNEWER, (void *)t);
    	    break;
    	case 'D':
            t = get_mtimes(optarg);
            if (t == NULL) {
                if (rank == 0) {
    	            printf("%s: can't find file %s\n", argv[0], optarg);
                }
    	        exit(1);
    	    }
    	    mfu_pred_add(pred_head, MFU_PRED_CNEWER, (void *)t);
    	    break;

    	case 'p':
    	    mfu_pred_add(pred_head, MFU_PRED_PRINT, NULL);
    	    break;

    	case 't':
            ret = add_type(pred_head, *optarg);
            if (ret != 1) {
                if (rank == 0) {
    	            printf("%s: unsupported file type %s\n", argv[0], optarg);
                }
    	        exit(1);
            }
    	    break;

        case 'i':
            inputname = MFU_STRDUP(optarg);
            break;
        case 'o':
            outputname = MFU_STRDUP(optarg);
            break;
        case 'v':
            mfu_debug_level = MFU_LOG_VERBOSE;
            break;
        case 'q':
            mfu_debug_level = MFU_LOG_NONE;
            break;
        case 'h':
            usage = 1;
            break;
        case '?':
            usage = 1;
            break;
        default:
            if (rank == 0) {
                printf("?? getopt returned character code 0%o ??\n", c);
            }
    	}
    }

    pred_commit(pred_head);

    /* paths to walk come after the options */
    int numpaths = 0;
    mfu_param_path* paths = NULL;
    if (optind < argc) {
        /* got a path to walk */
        walk = 1;

        /* determine number of paths specified by user */
        numpaths = argc - optind;

        /* allocate space for each path */
        paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path));

        /* process each path */
        char** p = &argv[optind];
        mfu_param_path_set_all((uint64_t)numpaths, (const char**)p, paths);
        optind += numpaths;

        /* don't allow user to specify input file with walk */
        if (inputname != NULL) {
            usage = 1;
        }
    }
    else {
        /* if we're not walking, we must be reading,
         * and for that we need a file */
        if (inputname == NULL) {
            usage = 1;
        }
    }

    if (usage) {
        if (rank == 0) {
            print_usage();
        }
        mfu_finalize();
        MPI_Finalize();
        return 0;
    }


    /* create an empty file list */
    mfu_flist flist = mfu_flist_new();

    if (walk) {
        /* walk list of input paths */
        mfu_flist_walk_param_paths(numpaths, paths, walk_opts, flist);
    }
    else {
        /* read data from cache file */
        mfu_flist_read_cache(inputname, flist);
    }

    /* apply predicates to each item in list */
    mfu_flist flist2 = mfu_flist_filter_pred(flist, pred_head);

    /* write data to cache file */
    if (outputname != NULL) {
        if (!text) {
            mfu_flist_write_cache(outputname, flist2);
        } else {
            mfu_flist_write_text(outputname, flist2);
        }
    }

    /* free off the filtered list */
    mfu_flist_free(&flist2);

    /* free users, groups, and files objects */
    mfu_flist_free(&flist);

    /* free predicate list */
    mfu_pred_free(&pred_head);

    /* free memory allocated for options */
    mfu_free(&outputname);
    mfu_free(&inputname);

    /* free the path parameters */
    mfu_param_path_free_all(numpaths, paths);

    /* free memory allocated to hold params */
    mfu_free(&paths);

    /* free structure holding current time */
    mfu_free(&now_t);

    /* free the walk options */
    mfu_walk_opts_delete(&walk_opts);

    /* shut down MPI */
    mfu_finalize();
    MPI_Finalize();

    return 0;
}
コード例 #5
0
ファイル: dstripe.c プロジェクト: hpc/fileutils
int main(int argc, char* argv[])
{
    MPI_Init(&argc, &argv);
    mfu_init();

    /* get our rank and number of ranks in the job */
    int rank, ranks;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &ranks);

    /* pointer to mfu_walk_opts */
    mfu_walk_opts_t* walk_opts = mfu_walk_opts_new();

    uint64_t idx;
    int option_index = 0;
    int usage = 0;
    int report = 0;
    unsigned int numpaths = 0;
    mfu_param_path* paths = NULL;
    unsigned long long bytes;

    /* verbose by default */
    mfu_debug_level = MFU_LOG_VERBOSE;

    /* default to 1MB stripe size, stripe across all OSTs, and all files are candidates */
    int stripes = -1;
    uint64_t stripe_size = 1048576;
    uint64_t min_size = 0;

    static struct option long_options[] = {
        {"count",    1, 0, 'c'},
        {"size",     1, 0, 's'},
        {"minsize",  1, 0, 'm'},
        {"report",   0, 0, 'r'},
        {"progress", 1, 0, 'P'},
        {"verbose",  0, 0, 'v'},
        {"quiet",    0, 0, 'q'},
        {"help",     0, 0, 'h'},
        {0, 0, 0, 0}
    };

    while (1) {
        int c = getopt_long(argc, argv, "c:s:m:rvqh",
                    long_options, &option_index);

        if (c == -1) {
            break;
        }

        switch (c) {
            case 'c':
                /* stripe count */
                stripes = atoi(optarg);
                break;
            case 's':
                /* stripe size in bytes */
                if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS) {
                    if (rank == 0) {
                        MFU_LOG(MFU_LOG_ERR, "Failed to parse stripe size: %s", optarg);
                    }
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
                stripe_size = (uint64_t)bytes;
                break;
            case 'm':
                /* min file size in bytes */
                if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS) {
                    if (rank == 0) {
                        MFU_LOG(MFU_LOG_ERR, "Failed to parse minimum file size: %s", optarg);
                    }
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
                min_size = (uint64_t)bytes;
                break;
            case 'r':
                /* report striping info */
		report = 1;
                break;
            case 'P':
                mfu_progress_timeout = atoi(optarg);
                break;
            case 'v':
                mfu_debug_level = MFU_LOG_VERBOSE;
                break;
            case 'q':
                mfu_debug_level = MFU_LOG_NONE;
                break;
            case 'h':
                /* display usage */
                usage = 1;
                break;
            case '?':
                /* display usage */
                usage = 1;
                break;
            default:
                if (rank == 0) {
                    printf("?? getopt returned character code 0%o ??\n", c);
                }
        }
    }

    /* check that we got a valid progress value */
    if (mfu_progress_timeout < 0) {
        if (rank == 0) {
            MFU_LOG(MFU_LOG_ERR, "Seconds in --progress must be non-negative: %d invalid", mfu_progress_timeout);
        }
        usage = 1;
    }

    /* paths to walk come after the options */
    if (optind < argc) {
        /* determine number of paths specified by user */
        numpaths = argc - optind;

        /* allocate space for each path */
        paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path));

        /* process each path */
        char** p = &argv[optind];
        mfu_param_path_set_all((uint64_t)numpaths, (const char**)p, paths);
        optind += numpaths;
    } else {
        usage = 1;
    }

    /* if we need to print usage, print it and exit */
    if (usage) {
        if (rank == 0) {
            print_usage();
        }

        mfu_finalize();
        MPI_Finalize();
        return 1;
    }

    /* nothing to do if lustre support is disabled */
#ifndef LUSTRE_SUPPORT
    if (rank == 0) {
        MFU_LOG(MFU_LOG_ERR, "Lustre support is disabled.");
    }
    MPI_Abort(MPI_COMM_WORLD, 1);
#endif

    /* stripe count must be -1 for all available or greater than 0 */
    if (stripes < -1) {
        if (rank == 0) {
            MFU_LOG(MFU_LOG_ERR, "Stripe count must be -1 for all servers, 0 for lustre file system default, or a positive value");
        }
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* lustre requires stripe sizes to be aligned */
    if (stripe_size > 0 && stripe_size % 65536 != 0) {
        if (rank == 0) {
            MFU_LOG(MFU_LOG_ERR, "Stripe size must be a multiple of 65536");
        }
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* TODO: verify that source / target are on Lustre */

    /* walk list of input paths and stat as we walk */
    mfu_flist flist = mfu_flist_new();
    mfu_flist_walk_param_paths(numpaths, paths, walk_opts, flist);

    /* filter down our list to files which don't meet our striping requirements */
    mfu_flist filtered = filter_list(flist, stripes, stripe_size, min_size, &create_prog_count_total, &stripe_prog_bytes_total);
    mfu_flist_free(&flist);

    MPI_Barrier(MPI_COMM_WORLD);

    /* report the file size and stripe count of all files we found */
    if (report) {
        /* report the files in our filtered list */
        stripe_info_report(filtered);

        /* free the paths and our list */
        mfu_flist_free(&filtered);
        mfu_param_path_free_all(numpaths, paths);
        mfu_free(&paths);

        /* finalize */
        mfu_finalize();
        MPI_Finalize();
        return 0;
    }

    /* generate a global suffix for our temp files and have each node check it's list */
    char suffix[8];
    uint64_t retry;

    /* seed our random number generator */
    srand(time(NULL));

    /* keep trying to make a valid random suffix...*/
    do {
        uint64_t attempt = 0;

        /* make rank 0 responsible for generating a random suffix */
        if (rank == 0) {
            generate_suffix(suffix, sizeof(suffix));
        }

        /* broadcast the random suffix to all ranks */
        MPI_Bcast(suffix, sizeof(suffix), MPI_CHAR, 0, MPI_COMM_WORLD);

        /* check that the file doesn't already exist */
        uint64_t size = mfu_flist_size(filtered);
        for (idx = 0; idx < size; idx++) {
            char temp_path[PATH_MAX];
            strcpy(temp_path, mfu_flist_file_get_name(filtered, idx));
            strcat(temp_path, suffix);
            if(!mfu_access(temp_path, F_OK)) {
                /* the file already exists */
                attempt = 1;
                break;
            }
        }

        /* do a reduce to figure out if a rank has a file collision */
        MPI_Allreduce(&attempt, &retry, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD);
    } while(retry != 0);

    /* initialize progress messages while creating files */
    create_prog_count = 0;
    create_prog = mfu_progress_start(mfu_progress_timeout, 1, MPI_COMM_WORLD, create_progress_fn);

    /* create new files so we can restripe */
    uint64_t size = mfu_flist_size(filtered);
    for (idx = 0; idx < size; idx++) {
        char temp_path[PATH_MAX];
        strcpy(temp_path, mfu_flist_file_get_name(filtered, idx));
        strcat(temp_path, suffix);

        /* create a striped file at the temp file path */
        mfu_stripe_set(temp_path, stripe_size, stripes);

        /* update our status for file create progress */
        create_prog_count++;
        mfu_progress_update(&create_prog_count, create_prog);
    }

    /* finalize file create progress messages */
    mfu_progress_complete(&create_prog_count, &create_prog);

    MPI_Barrier(MPI_COMM_WORLD);

    /* initialize progress messages while copying data */
    stripe_prog_bytes = 0;
    stripe_prog = mfu_progress_start(mfu_progress_timeout, 1, MPI_COMM_WORLD, stripe_progress_fn);

    /* found a suffix, now we need to break our files into chunks based on stripe size */
    mfu_file_chunk* file_chunks = mfu_file_chunk_list_alloc(filtered, stripe_size);
    mfu_file_chunk* p = file_chunks;
    while (p != NULL) {
        /* build path to temp file */
        char temp_path[PATH_MAX];
        strcpy(temp_path, p->name);
        strcat(temp_path, suffix);

        /* write each chunk in our list */
        write_file_chunk(p, temp_path);

        /* move on to next file chunk */
        p = p->next;
    }
    mfu_file_chunk_list_free(&file_chunks);

    /* finalize progress messages */
    mfu_progress_complete(&stripe_prog_bytes, &stripe_prog);

    MPI_Barrier(MPI_COMM_WORLD);

    /* remove input file and rename temp file */
    for (idx = 0; idx < size; idx++) {
        /* build path to temp file */
        const char *in_path = mfu_flist_file_get_name(filtered, idx);
        char out_path[PATH_MAX];
        strcpy(out_path, in_path);
        strcat(out_path, suffix);

        /* change the mode of the newly restriped file to be the same as the old one */
        mode_t mode = (mode_t) mfu_flist_file_get_mode(filtered, idx);
        if (mfu_chmod(out_path, mode) != 0) {
            MFU_LOG(MFU_LOG_ERR, "Failed to chmod file %s (%s)", out_path, strerror(errno));
            MPI_Abort(MPI_COMM_WORLD, 1);
        }

        /* rename the new, restriped file to the old name */
        if (rename(out_path, in_path) != 0) {
            MFU_LOG(MFU_LOG_ERR, "Failed to rename file %s to %s", out_path, in_path);
            MPI_Abort(MPI_COMM_WORLD, 1);
        }
    }

    /* wait for everyone to finish */
    MPI_Barrier(MPI_COMM_WORLD);

    /* free the walk options */
    mfu_walk_opts_delete(&walk_opts);

    /* free filtered list, path parameters */
    mfu_flist_free(&filtered);
    mfu_param_path_free_all(numpaths, paths);
    mfu_free(&paths);

    mfu_finalize();
    MPI_Finalize();

    return 0;
}
コード例 #6
0
void DTAR_exit(int code) {
    mfu_finalize();
    MPI_Finalize();
    exit(code);
}
コード例 #7
0
ファイル: dwalk.c プロジェクト: hpc/fileutils
int main(int argc, char** argv)
{
    int i;

    /* initialize MPI */
    MPI_Init(&argc, &argv);
    mfu_init();

    /* get our rank and the size of comm_world */
    int rank, ranks;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &ranks);

    /* pointer to mfu_walk_opts */
    mfu_walk_opts_t* walk_opts = mfu_walk_opts_new();

    /* TODO: extend options
     *   - allow user to cache scan result in file
     *   - allow user to load cached scan as input
     *
     *   - allow user to filter by user, group, or filename using keyword or regex
     *   - allow user to specify time window
     *   - allow user to specify file sizes
     *
     *   - allow user to sort by different fields
     *   - allow user to group output (sum all bytes, group by user) */

    char* inputname      = NULL;
    char* outputname     = NULL;
    char* sortfields     = NULL;
    char* distribution   = NULL;

    int file_histogram       = 0;
    int walk                 = 0;
    int print                = 0;
    int text                 = 0;

    struct distribute_option option;

    /* verbose by default */
    mfu_debug_level = MFU_LOG_VERBOSE;

    int option_index = 0;
    static struct option long_options[] = {
        {"input",          1, 0, 'i'},
        {"output",         1, 0, 'o'},
        {"text",           0, 0, 't'},
        {"lite",           0, 0, 'l'},
        {"sort",           1, 0, 's'},
        {"distribution",   1, 0, 'd'},
        {"file_histogram", 0, 0, 'f'},
        {"print",          0, 0, 'p'},
        {"verbose",        0, 0, 'v'},
        {"quiet",          0, 0, 'q'},
        {"help",           0, 0, 'h'},
        {0, 0, 0, 0}
    };

    int usage = 0;
    while (1) {
        int c = getopt_long(
                    argc, argv, "i:o:tls:d:fpvqh",
                    long_options, &option_index
                );

        if (c == -1) {
            break;
        }

        switch (c) {
            case 'i':
                inputname = MFU_STRDUP(optarg);
                break;
            case 'o':
                outputname = MFU_STRDUP(optarg);
                break;
            case 'l':
                /* don't stat each file on the walk */
                walk_opts->use_stat = 0;
                break;
            case 's':
                sortfields = MFU_STRDUP(optarg);
                break;
            case 'd':
                distribution = MFU_STRDUP(optarg);
                break;
            case 'f':
                file_histogram = 1;
                break;
            case 'p':
                print = 1;
                break;
            case 'v':
                mfu_debug_level = MFU_LOG_VERBOSE;
                break;
            case 'q':
                mfu_debug_level = 0;
                break;
            case 't':
                text = 1;
                break;
            case 'h':
                usage = 1;
                break;
            case '?':
                usage = 1;
                break;
            default:
                if (rank == 0) {
                    printf("?? getopt returned character code 0%o ??\n", c);
                }
        }
    }

    /* paths to walk come after the options */
    int numpaths = 0;
    mfu_param_path* paths = NULL;
    if (optind < argc) {
        /* got a path to walk */
        walk = 1;

        /* determine number of paths specified by user */
        numpaths = argc - optind;

        /* allocate space for each path */
        paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path));

        /* process each path */
        char** p = &argv[optind];
        mfu_param_path_set_all((uint64_t)numpaths, (const char**)p, paths);
        optind += numpaths;

        /* don't allow user to specify input file with walk */
        if (inputname != NULL) {
            usage = 1;
        }
    }
    else {
        /* if we're not walking, we must be reading,
         * and for that we need a file */
        if (inputname == NULL) {
            usage = 1;
        }
    }

    /* if user is trying to sort, verify the sort fields are valid */
    if (sortfields != NULL) {
        int maxfields;
        int nfields = 0;
        char* sortfields_copy = MFU_STRDUP(sortfields);
        if (walk_opts->use_stat) {
            maxfields = 7;
            char* token = strtok(sortfields_copy, ",");
            while (token != NULL) {
                if (strcmp(token,  "name")  != 0 &&
                        strcmp(token, "-name")  != 0 &&
                        strcmp(token,  "user")  != 0 &&
                        strcmp(token, "-user")  != 0 &&
                        strcmp(token,  "group") != 0 &&
                        strcmp(token, "-group") != 0 &&
                        strcmp(token,  "uid")   != 0 &&
                        strcmp(token, "-uid")   != 0 &&
                        strcmp(token,  "gid")   != 0 &&
                        strcmp(token, "-gid")   != 0 &&
                        strcmp(token,  "atime") != 0 &&
                        strcmp(token, "-atime") != 0 &&
                        strcmp(token,  "mtime") != 0 &&
                        strcmp(token, "-mtime") != 0 &&
                        strcmp(token,  "ctime") != 0 &&
                        strcmp(token, "-ctime") != 0 &&
                        strcmp(token,  "size")  != 0 &&
                        strcmp(token, "-size")  != 0) {
                    /* invalid token */
                    if (rank == 0) {
                        printf("Invalid sort field: %s\n", token);
                    }
                    usage = 1;
                }
                nfields++;
                token = strtok(NULL, ",");
            }
        }
        else {
            maxfields = 1;
            char* token = strtok(sortfields_copy, ",");
            while (token != NULL) {
                if (strcmp(token,  "name")  != 0 &&
                        strcmp(token, "-name")  != 0) {
                    /* invalid token */
                    if (rank == 0) {
                        printf("Invalid sort field: %s\n", token);
                    }
                    usage = 1;
                }
                nfields++;
                token = strtok(NULL, ",");
            }
        }
        if (nfields > maxfields) {
            if (rank == 0) {
                printf("Exceeded maximum number of sort fields: %d\n", maxfields);
            }
            usage = 1;
        }
        mfu_free(&sortfields_copy);
    }

    if (distribution != NULL) {
        if (distribution_parse(&option, distribution) != 0) {
            if (rank == 0) {
                printf("Invalid distribution argument: %s\n", distribution);
            }
            usage = 1;
        } else if (rank == 0 && option.separator_number != 0) {
            printf("Separators: ");
            for (i = 0; i < option.separator_number; i++) {
                if (i != 0) {
                    printf(", ");
                }
                printf("%"PRIu64, option.separators[i]);
            }
            printf("\n");
        }
    }

    if (usage) {
        if (rank == 0) {
            print_usage();
        }
        MPI_Finalize();
        return 0;
    }

    /* TODO: check stat fields fit within MPI types */
    // if (sizeof(st_uid) > uint64_t) error(); etc...

    /* create an empty file list with default values */
    mfu_flist flist = mfu_flist_new();

    if (walk) {
        /* walk list of input paths */
        mfu_flist_walk_param_paths(numpaths, paths, walk_opts, flist);
    }
    else {
        /* read data from cache file */
        mfu_flist_read_cache(inputname, flist);
    }

    /* TODO: filter files */
    //filter_files(&flist);

    /* sort files */
    if (sortfields != NULL) {
        /* TODO: don't sort unless all_count > 0 */
        mfu_flist_sort(sortfields, &flist);
    }

    /* print details for individual files */
    if (print) {
        mfu_flist_print(flist);
    }

    /* print summary statistics of flist */
    mfu_flist_print_summary(flist);

    /* print distribution if user specified this option */
    if (distribution != NULL || file_histogram) {
        print_flist_distribution(file_histogram, &option, &flist, rank);
    }

    /* write data to cache file */
    if (outputname != NULL) {
        if (!text) {
            mfu_flist_write_cache(outputname, flist);
        } else {
            mfu_flist_write_text(outputname, flist);
        }
    }

    /* free users, groups, and files objects */
    mfu_flist_free(&flist);

    /* free memory allocated for options */
    mfu_free(&distribution);
    mfu_free(&sortfields);
    mfu_free(&outputname);
    mfu_free(&inputname);

    /* free the path parameters */
    mfu_param_path_free_all(numpaths, paths);

    /* free memory allocated to hold params */
    mfu_free(&paths);

    /* free the walk options */
    mfu_walk_opts_delete(&walk_opts);

    /* shut down MPI */
    mfu_finalize();
    MPI_Finalize();

    return 0;
}
コード例 #8
0
ファイル: ddup.c プロジェクト: hpc/fileutils
int main(int argc, char** argv)
{
    uint64_t i;
    int status;
    uint64_t file_size;

    uint64_t chunk_size = DDUP_CHUNK_SIZE;

    SHA256_CTX* ctx_ptr;

    MPI_Init(NULL, NULL);
    mfu_init();

    int rank, ranks;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &ranks);

    /* pointer to mfu_walk_opts */
    mfu_walk_opts_t* walk_opts = mfu_walk_opts_new();

    mfu_debug_level = MFU_LOG_VERBOSE;

    static struct option long_options[] = {
        {"debug",    0, 0, 'd'},
        {"verbose",  0, 0, 'v'},
        {"quiet",    0, 0, 'q'},
        {"help",     0, 0, 'h'},
        {0, 0, 0, 0}
    };

    /* Parse options */
    int usage = 0;
    int help  = 0;
    int c;
    int option_index = 0;
    while ((c = getopt_long(argc, argv, "d:vqh", \
                            long_options, &option_index)) != -1)
    {
        switch (c) {
        case 'd':
            if (strncmp(optarg, "fatal", 5) == 0) {
                mfu_debug_level = MFU_LOG_FATAL;

                if (rank == 0)
                    MFU_LOG(MFU_LOG_INFO,
                              "Debug level set to: fatal");
            }
            else if (strncmp(optarg, "err", 3) == 0) {
                mfu_debug_level = MFU_LOG_ERR;

                if (rank == 0)
                    MFU_LOG(MFU_LOG_INFO,
                              "Debug level set to: "
                              "errors");
            }
            else if (strncmp(optarg, "warn", 4) == 0) {
                mfu_debug_level = MFU_LOG_WARN;

                if (rank == 0)
                    MFU_LOG(MFU_LOG_INFO,
                              "Debug level set to: "
                              "warnings");
            }
            else if (strncmp(optarg, "info", 4) == 0) {
                mfu_debug_level = MFU_LOG_INFO;

                if (rank == 0)
                    MFU_LOG(MFU_LOG_INFO,
                              "Debug level set to: info");
            }
            else if (strncmp(optarg, "dbg", 3) == 0) {
                mfu_debug_level = MFU_LOG_DBG;

                if (rank == 0)
                    MFU_LOG(MFU_LOG_INFO,
                              "Debug level set to: debug");
            }
            else {
                if (rank == 0)
                    MFU_LOG(MFU_LOG_INFO,
                              "Debug level `%s' not "
                              "recognized. Defaulting to "
                              "`info'.", optarg);
            }
        case 'h':
            usage = 1;
            help  = 1;
        case 'v':
            mfu_debug_level = MFU_LOG_VERBOSE;
            break;
        case 'q':
            mfu_debug_level = MFU_LOG_NONE;
            break;
        case '?':
            usage = 1;
            help  = 1;
            break;
        default:
            usage = 1;
            break;
        }
    }

    /* check that user gave us one and only one directory */
    int numargs = argc - optind;
    if (numargs != 1) {
        /* missing the directory, so post a message, and print usage */
        if (rank == 0) {
            MFU_LOG(MFU_LOG_ERR, "You must specify a directory path");
        }
        usage = 1;
    }

    /* print usage and bail if needed */
    if (usage) {
        if (rank == 0) {
            print_usage();
        }
        /* set error code base on whether user requested usage or not */
        if (help) {
            status = 0;
        } else {
            status = -1;
        }
        MPI_Barrier(MPI_COMM_WORLD);
        goto out;
    }

    /* get the directory name */
    const char* dir = argv[optind];

    /* create MPI datatypes */
    MPI_Datatype key;
    MPI_Datatype keysat;
    mpi_type_init(&key, &keysat);

    /* create DTCMP comparison operation */
    DTCMP_Op cmp;
    mtcmp_cmp_init(&cmp);

    /* allocate buffer to read data from file */
    char* chunk_buf = (char*)MFU_MALLOC(DDUP_CHUNK_SIZE);

    /* allocate a file list */
    mfu_flist flist = mfu_flist_new();

    /* Walk the path(s) to build the flist */
    mfu_flist_walk_path(dir, walk_opts, flist);

    /* TODO: spread list among procs? */

    /* get local number of items in flist */
    uint64_t checking_files = mfu_flist_size(flist);

    /* allocate memory to hold SHA256 context values */
    struct file_item* file_items = (struct file_item*) MFU_MALLOC(checking_files * sizeof(*file_items));

    /* Allocate two lists of length size, where each
     * element has (DDUP_KEY_SIZE + 1) uint64_t values
     * (id, checksum, index)
     */
    size_t list_bytes = checking_files * (DDUP_KEY_SIZE + 1) * sizeof(uint64_t);
    uint64_t* list     = (uint64_t*) MFU_MALLOC(list_bytes);
    uint64_t* new_list = (uint64_t*) MFU_MALLOC(list_bytes);

    /* Initialize the list */
    uint64_t* ptr = list;
    uint64_t new_checking_files = 0;
    for (i = 0; i < checking_files; i++) {
        /* check that item is a regular file */
        mode_t mode = (mode_t) mfu_flist_file_get_mode(flist, i);
        if (! S_ISREG(mode)) {
            continue;
        }

        /* get the file size */
        file_size = mfu_flist_file_get_size(flist, i);
        if (file_size == 0) {
            /* Files with size zero are not interesting at all */
            continue;
        }

        /* for first pass, group all files with same file size */
        ptr[0] = file_size;

        /* we'll leave the middle part of the key unset */

        /* record our index in flist */
        ptr[DDUP_KEY_SIZE] = i;

        /* initialize the SHA256 hash state for this file */
        SHA256_Init(&file_items[i].ctx);

        /* increment our file count */
        new_checking_files++;

        /* advance to next spot in the list */
        ptr += DDUP_KEY_SIZE + 1;
    }

    /* reduce our list count based on any files filtered out above */
    checking_files = new_checking_files;

    /* allocate arrays to hold result from DTCMP_Rankv call to
     * assign group and rank values to each item */
    uint64_t output_bytes = checking_files * sizeof(uint64_t);
    uint64_t* group_id    = (uint64_t*) MFU_MALLOC(output_bytes);
    uint64_t* group_ranks = (uint64_t*) MFU_MALLOC(output_bytes);
    uint64_t* group_rank  = (uint64_t*) MFU_MALLOC(output_bytes);

    /* get total number of items across all tasks */
    uint64_t sum_checking_files;
    MPI_Allreduce(&checking_files, &sum_checking_files, 1,
                  MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);

    uint64_t chunk_id = 0;
    while (sum_checking_files > 1) {
        /* update the chunk id we'll read from all files */
        chunk_id++;

        /* iterate over our list and compute SHA256 value for each */
        ptr = list;
        for (i = 0; i < checking_files; i++) {
            /* get the flist index for this item */
            uint64_t idx = ptr[DDUP_KEY_SIZE];

            /* look up file name */
            const char* fname = mfu_flist_file_get_name(flist, idx);

            /* look up file size */
            file_size = mfu_flist_file_get_size(flist, idx);

            /* read a chunk of data from the file into chunk_buf */
            uint64_t data_size;
            status = read_data(fname, chunk_buf, chunk_id,
                               chunk_size, file_size, &data_size);
            if (status) {
                /* File size has been changed, TODO: handle */
                printf("failed to read file %s, maybe file "
                       "size has been modified during the "
                       "process", fname);
            }

            /* update the SHA256 context for this file */
            ctx_ptr = &file_items[idx].ctx;
            SHA256_Update(ctx_ptr, chunk_buf, data_size);

            /*
             * Use SHA256 value as key.
             * This is actually an hack, but SHA256_Final can't
             * be called multiple times with out changing ctx
             */
            SHA256_CTX ctx_tmp;
            memcpy(&ctx_tmp, ctx_ptr, sizeof(ctx_tmp));
            SHA256_Final((unsigned char*)(ptr + 1), &ctx_tmp);

            /* move on to next file in the list */
            ptr += DDUP_KEY_SIZE + 1;
        }

        /* Assign group ids and compute group sizes */
        uint64_t groups;
        DTCMP_Rankv(
            (int)checking_files, list,
            &groups, group_id, group_ranks, group_rank,
            key, keysat, cmp, DTCMP_FLAG_NONE, MPI_COMM_WORLD
        );

        /* any files assigned to a group of size 1 is unique,
         * any files in groups sizes > 1 for which we've read
         * all bytes are the same, and filter all other files
         * into a new list for another iteration */
        new_checking_files = 0;
        ptr = list;
        uint64_t* new_ptr = new_list;
        for (i = 0; i < checking_files; i++) {
            /* Get index into flist for this item */
            uint64_t idx = ptr[DDUP_KEY_SIZE];

            /* look up file name */
            const char* fname = mfu_flist_file_get_name(flist, idx);

            /* look up file size */
            file_size = mfu_flist_file_get_size(flist, idx);

            /* get a pointer to the SHA256 context for this file */
            ctx_ptr = &file_items[idx].ctx;

            if (group_ranks[i] == 1) {
                /*
                 * Only one file in this group,
                 * mfu_flist_file_name(flist, idx) is unique
                 */
            } else if (file_size <= (chunk_id * chunk_size)) {
                /*
                 * We've run out of bytes to checksum, and we
                 * still have a group size > 1
                 * mfu_flist_file_name(flist, idx) is a
                 * duplicate with other files that also have
                 * matching group_id[i]
                 */
                unsigned char digest[SHA256_DIGEST_LENGTH];
                SHA256_Final(digest, ctx_ptr);

                char digest_string[SHA256_DIGEST_LENGTH * 2 + 1];
                dump_sha256_digest(digest_string, digest);
                printf("%s %s\n", fname, digest_string);
            } else {
                /* Have multiple files with the same checksum,
                 * but still have bytes left to read, so keep
                 * this file
                 */

                /* use new group ID to segregate files,
                 * this id will be unique for all files of the
                 * same size and having the same hash up to
                 * this point */
                new_ptr[0] = group_id[i];

                /* Copy over flist index into new list entry */
                new_ptr[DDUP_KEY_SIZE] = idx;

                /* got one more in the new list */
                new_checking_files++;

                /* move on to next item in new list */
                new_ptr += DDUP_KEY_SIZE + 1;

                MFU_LOG(MFU_LOG_DBG, "checking file "
                          "\"%s\" for chunk index %d of size %"
                          PRIu64"\n", fname, (int)chunk_id,
                          chunk_size);
            }

            /* move on to next file in the list */
            ptr += DDUP_KEY_SIZE + 1;
        }

        /* Swap lists */
        uint64_t* tmp_list;
        tmp_list = list;
        list     = new_list;
        new_list = tmp_list;

        /* Update size of current list */
        checking_files = new_checking_files;

        /* Get new global list size */
        MPI_Allreduce(&checking_files, &sum_checking_files, 1,
                      MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
    }

    /* free the walk options */
    mfu_walk_opts_delete(&walk_opts);

    mfu_free(&group_rank);
    mfu_free(&group_ranks);
    mfu_free(&group_id);
    mfu_free(&new_list);
    mfu_free(&list);
    mfu_free(&file_items);
    mfu_free(&chunk_buf);
    mfu_flist_free(&flist);

    mtcmp_cmp_fini(&cmp);
    mpi_type_fini(&key, &keysat);

    status = 0;

out:
    mfu_finalize();
    MPI_Finalize();

    return status;
}