예제 #1
0
/* for given depth, just remove the files we know about */
static void remove_direct(mfu_flist list, uint64_t* rmcount)
{
    /* each process directly removes its elements */
    uint64_t idx;
    uint64_t size = mfu_flist_size(list);
    for (idx = 0; idx < size; idx++) {
        /* get name and type of item */
        const char* name = mfu_flist_file_get_name(list, idx);
        mfu_filetype type = mfu_flist_file_get_type(list, idx);

        /* delete item */
        if (type == MFU_TYPE_DIR) {
            remove_type('d', name);
        }
        else if (type == MFU_TYPE_FILE || type == MFU_TYPE_LINK) {
            remove_type('f', name);
        }
        else {
            remove_type('u', name);
        }
    }

    /* report the number of items we deleted */
    *rmcount = size;

    return;
}
예제 #2
0
/* given an input list, split items into separate lists depending
 * on their depth, returns number of levels, minimum depth, and
 * array of lists as output */
void mfu_flist_array_by_depth(
    mfu_flist srclist,
    int* outlevels,
    int* outmin,
    mfu_flist** outlists)
{
    /* check that our pointers are valid */
    if (outlevels == NULL || outmin == NULL || outlists == NULL) {
        return;
    }

    /* initialize return values */
    *outlevels = 0;
    *outmin    = -1;
    *outlists  = NULL;

    /* get total file count */
    uint64_t total = mfu_flist_global_size(srclist);
    if (total == 0) {
        return;
    }

    /* get min and max depths, determine number of levels,
     * allocate array of lists */
    int min = mfu_flist_min_depth(srclist);
    int max = mfu_flist_max_depth(srclist);
    int levels = max - min + 1;
    mfu_flist* lists = (mfu_flist*) MFU_MALLOC((size_t)levels * sizeof(mfu_flist));

    /* create a list for each level */
    int i;
    for (i = 0; i < levels; i++) {
        lists[i] = mfu_flist_subset(srclist);
    }

    /* copy each item from source list to its corresponding level */
    uint64_t idx = 0;
    uint64_t size = mfu_flist_size(srclist);
    while (idx < size) {
        int depth = mfu_flist_file_get_depth(srclist, idx);
        int depth_index = depth - min;
        mfu_flist dstlist = lists[depth_index];
        mfu_flist_file_copy(srclist, idx, dstlist);
        idx++;
    }

    /* summarize each list */
    for (i = 0; i < levels; i++) {
        mfu_flist_summarize(lists[i]);
    }

    /* set return parameters */
    *outlevels = levels;
    *outmin    = min;
    *outlists  = lists;

    return;
}
예제 #3
0
파일: dfind.c 프로젝트: hpc/fileutils
/* apply predicate tests and actions to matching items in flist */
static void mfu_flist_pred(mfu_flist flist, mfu_pred* p)
{
    uint64_t idx;
    uint64_t size = mfu_flist_size(flist);
    for (idx = 0; idx < size; idx++) {
        mfu_pred_execute(flist, idx, p);
    }
    return;
}
예제 #4
0
파일: dstripe.c 프로젝트: hpc/fileutils
/* filter the list of files down based on the current stripe size and stripe count */
static mfu_flist filter_list(mfu_flist list, int stripe_count, uint64_t stripe_size, uint64_t min_size, uint64_t* total_count, uint64_t* total_size)
{
    /* initialize counters for file and byte count */
    uint64_t my_count = 0;
    uint64_t my_size  = 0;

    /* this is going to be a subset of the full file list */
    mfu_flist filtered = mfu_flist_subset(list);

    uint64_t idx;
    uint64_t size = mfu_flist_size(list);
    for (idx = 0; idx < size; idx++) {
        /* we only care about regular files */
        mfu_filetype type = mfu_flist_file_get_type(list, idx);
        if (type == MFU_TYPE_FILE) {
            /* if our file is below the minimum file size, skip it */
            uint64_t filesize = mfu_flist_file_get_size(list, idx);
            if (filesize < min_size) {
                continue;
            }

            const char* in_path = mfu_flist_file_get_name(list, idx);
            uint64_t curr_stripe_size = 0;
            uint64_t curr_stripe_count = 0;

            /*
             * attempt to get striping info,
             * skip the file if we can't get the striping info we seek
             */
            if (mfu_stripe_get(in_path, &curr_stripe_size, &curr_stripe_count) != 0) {
                continue;
            }

            /* TODO: this should probably be better */
            /* if the current stripe size or stripe count doesn't match, then a restripe the file */
            if (curr_stripe_count != stripe_count || curr_stripe_size != stripe_size) {
                mfu_flist_file_copy(list, idx, filtered);

                /* increment file count and add file size to our running total */
                my_count += 1;
                my_size  += filesize;
            }
        }
    }

    /* summarize and return the new list */
    mfu_flist_summarize(filtered);

    /* get sum of count and size */
    MPI_Allreduce(&my_count, total_count, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
    MPI_Allreduce(&my_size,  total_size,  1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);

    return filtered;
}
예제 #5
0
/* Given an input file list, stat each file and enqueue details
 * in output file list, skip entries excluded by skip function
 * and skip args */
void mfu_flist_stat(
  mfu_flist input_flist,
  mfu_flist flist,
  mfu_flist_skip_fn skip_fn,
  void *skip_args)
{
    flist_t* file_list = (flist_t*)flist;

    /* we will stat all items in output list, so set detail to 1 */
    file_list->detail = 1;

    /* get user data if needed */
    if (file_list->have_users == 0) {
        mfu_flist_usrgrp_get_users(flist);
    }

    /* get groups data if needed */
    if (file_list->have_groups == 0) {
        mfu_flist_usrgrp_get_groups(flist);
    }

    /* step through each item in input list and stat it */
    uint64_t idx;
    uint64_t size = mfu_flist_size(input_flist);
    for (idx = 0; idx < size; idx++) {
        /* get name of item */
        const char* name = mfu_flist_file_get_name(input_flist, idx);

        /* check whether we should skip this item */
        if (skip_fn != NULL && skip_fn(name, skip_args)) {
            /* skip this file, don't include it in new list */
            MFU_LOG(MFU_LOG_INFO, "skip %s");
            continue;
        }

        /* stat the item */
        struct stat st;
        int status = mfu_lstat(name, &st);
        if (status != 0) {
            MFU_LOG(MFU_LOG_ERR, "mfu_lstat() failed: `%s' rc=%d (errno=%d %s)", name, status, errno, strerror(errno));
            continue;
        }

        /* insert item into output list */
        mfu_flist_insert_stat(flist, name, st.st_mode, &st);
    }

    /* compute global summary */
    mfu_flist_summarize(flist);
}
예제 #6
0
파일: dstripe.c 프로젝트: hpc/fileutils
/* print to stdout the stripe size and count of each file in the mfu_flist */
static void stripe_info_report(mfu_flist list)
{
    uint64_t idx;
    uint64_t size = mfu_flist_size(list);

    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    /* print header */
    if (rank == 0) {
        printf("%10s %3.3s %8.8s %s\n", "Size", "Cnt", "Str Size", "File Path");
        printf("%10s %3.3s %8.8s %s\n", "----", "---", "--------", "---------");
        fflush(stdout);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* print out file info */
    for (idx = 0; idx < size; idx++) {
        mfu_filetype type = mfu_flist_file_get_type(list, idx);

        /* report striping information for regular files only */
        if (type == MFU_TYPE_FILE) {
            const char* in_path = mfu_flist_file_get_name(list, idx);
            uint64_t stripe_size = 0;
            uint64_t stripe_count = 0;
            char filesize[11];
            char stripesize[9];

            /*
             * attempt to get striping info and print it out,
             * skip the file if we can't get the striping info we seek
             */
            if (mfu_stripe_get(in_path, &stripe_size, &stripe_count) != 0) {
                continue;
            }

            /* format it nicely */
            generate_pretty_size(filesize, sizeof(filesize), mfu_flist_file_get_size(list, idx));
            generate_pretty_size(stripesize, sizeof(stripesize), stripe_size);

            /* print the row */
            printf("%10.10s %3" PRId64 " %8.8s %s\n", filesize, stripe_count, stripesize, in_path);
            fflush(stdout);
        }
    }
}
예제 #7
0
static void remove_create(CIRCLE_handle* handle)
{
    char path[CIRCLE_MAX_STRING_LEN];

    /* enqueues all items at rm_depth to be deleted */
    uint64_t idx;
    uint64_t size = mfu_flist_size(circle_list);
    for (idx = 0; idx < size; idx++) {
        /* get name and type of item */
        const char* name = mfu_flist_file_get_name(circle_list, idx);
        mfu_filetype type = mfu_flist_file_get_type(circle_list, idx);

        /* encode type */
        if (type == MFU_TYPE_DIR) {
            path[0] = 'd';
        }
        else if (type == MFU_TYPE_FILE || type == MFU_TYPE_LINK) {
            path[0] = 'f';
        }
        else {
            path[0] = 'u';
        }

        /* encode name */
        size_t len = strlen(name) + 2;
        if (len <= CIRCLE_MAX_STRING_LEN) {
            strcpy(&path[1], name);
            handle->enqueue(path);
        }
        else {
            MFU_LOG(MFU_LOG_ERR, "Filename longer than %lu",
                      (unsigned long)CIRCLE_MAX_STRING_LEN
                     );
        }
    }

    return;
}
예제 #8
0
파일: ddup.c 프로젝트: hpc/fileutils
int main(int argc, char** argv)
{
    uint64_t i;
    int status;
    uint64_t file_size;

    uint64_t chunk_size = DDUP_CHUNK_SIZE;

    SHA256_CTX* ctx_ptr;

    MPI_Init(NULL, NULL);
    mfu_init();

    int rank, ranks;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &ranks);

    /* pointer to mfu_walk_opts */
    mfu_walk_opts_t* walk_opts = mfu_walk_opts_new();

    mfu_debug_level = MFU_LOG_VERBOSE;

    static struct option long_options[] = {
        {"debug",    0, 0, 'd'},
        {"verbose",  0, 0, 'v'},
        {"quiet",    0, 0, 'q'},
        {"help",     0, 0, 'h'},
        {0, 0, 0, 0}
    };

    /* Parse options */
    int usage = 0;
    int help  = 0;
    int c;
    int option_index = 0;
    while ((c = getopt_long(argc, argv, "d:vqh", \
                            long_options, &option_index)) != -1)
    {
        switch (c) {
        case 'd':
            if (strncmp(optarg, "fatal", 5) == 0) {
                mfu_debug_level = MFU_LOG_FATAL;

                if (rank == 0)
                    MFU_LOG(MFU_LOG_INFO,
                              "Debug level set to: fatal");
            }
            else if (strncmp(optarg, "err", 3) == 0) {
                mfu_debug_level = MFU_LOG_ERR;

                if (rank == 0)
                    MFU_LOG(MFU_LOG_INFO,
                              "Debug level set to: "
                              "errors");
            }
            else if (strncmp(optarg, "warn", 4) == 0) {
                mfu_debug_level = MFU_LOG_WARN;

                if (rank == 0)
                    MFU_LOG(MFU_LOG_INFO,
                              "Debug level set to: "
                              "warnings");
            }
            else if (strncmp(optarg, "info", 4) == 0) {
                mfu_debug_level = MFU_LOG_INFO;

                if (rank == 0)
                    MFU_LOG(MFU_LOG_INFO,
                              "Debug level set to: info");
            }
            else if (strncmp(optarg, "dbg", 3) == 0) {
                mfu_debug_level = MFU_LOG_DBG;

                if (rank == 0)
                    MFU_LOG(MFU_LOG_INFO,
                              "Debug level set to: debug");
            }
            else {
                if (rank == 0)
                    MFU_LOG(MFU_LOG_INFO,
                              "Debug level `%s' not "
                              "recognized. Defaulting to "
                              "`info'.", optarg);
            }
        case 'h':
            usage = 1;
            help  = 1;
        case 'v':
            mfu_debug_level = MFU_LOG_VERBOSE;
            break;
        case 'q':
            mfu_debug_level = MFU_LOG_NONE;
            break;
        case '?':
            usage = 1;
            help  = 1;
            break;
        default:
            usage = 1;
            break;
        }
    }

    /* check that user gave us one and only one directory */
    int numargs = argc - optind;
    if (numargs != 1) {
        /* missing the directory, so post a message, and print usage */
        if (rank == 0) {
            MFU_LOG(MFU_LOG_ERR, "You must specify a directory path");
        }
        usage = 1;
    }

    /* print usage and bail if needed */
    if (usage) {
        if (rank == 0) {
            print_usage();
        }
        /* set error code base on whether user requested usage or not */
        if (help) {
            status = 0;
        } else {
            status = -1;
        }
        MPI_Barrier(MPI_COMM_WORLD);
        goto out;
    }

    /* get the directory name */
    const char* dir = argv[optind];

    /* create MPI datatypes */
    MPI_Datatype key;
    MPI_Datatype keysat;
    mpi_type_init(&key, &keysat);

    /* create DTCMP comparison operation */
    DTCMP_Op cmp;
    mtcmp_cmp_init(&cmp);

    /* allocate buffer to read data from file */
    char* chunk_buf = (char*)MFU_MALLOC(DDUP_CHUNK_SIZE);

    /* allocate a file list */
    mfu_flist flist = mfu_flist_new();

    /* Walk the path(s) to build the flist */
    mfu_flist_walk_path(dir, walk_opts, flist);

    /* TODO: spread list among procs? */

    /* get local number of items in flist */
    uint64_t checking_files = mfu_flist_size(flist);

    /* allocate memory to hold SHA256 context values */
    struct file_item* file_items = (struct file_item*) MFU_MALLOC(checking_files * sizeof(*file_items));

    /* Allocate two lists of length size, where each
     * element has (DDUP_KEY_SIZE + 1) uint64_t values
     * (id, checksum, index)
     */
    size_t list_bytes = checking_files * (DDUP_KEY_SIZE + 1) * sizeof(uint64_t);
    uint64_t* list     = (uint64_t*) MFU_MALLOC(list_bytes);
    uint64_t* new_list = (uint64_t*) MFU_MALLOC(list_bytes);

    /* Initialize the list */
    uint64_t* ptr = list;
    uint64_t new_checking_files = 0;
    for (i = 0; i < checking_files; i++) {
        /* check that item is a regular file */
        mode_t mode = (mode_t) mfu_flist_file_get_mode(flist, i);
        if (! S_ISREG(mode)) {
            continue;
        }

        /* get the file size */
        file_size = mfu_flist_file_get_size(flist, i);
        if (file_size == 0) {
            /* Files with size zero are not interesting at all */
            continue;
        }

        /* for first pass, group all files with same file size */
        ptr[0] = file_size;

        /* we'll leave the middle part of the key unset */

        /* record our index in flist */
        ptr[DDUP_KEY_SIZE] = i;

        /* initialize the SHA256 hash state for this file */
        SHA256_Init(&file_items[i].ctx);

        /* increment our file count */
        new_checking_files++;

        /* advance to next spot in the list */
        ptr += DDUP_KEY_SIZE + 1;
    }

    /* reduce our list count based on any files filtered out above */
    checking_files = new_checking_files;

    /* allocate arrays to hold result from DTCMP_Rankv call to
     * assign group and rank values to each item */
    uint64_t output_bytes = checking_files * sizeof(uint64_t);
    uint64_t* group_id    = (uint64_t*) MFU_MALLOC(output_bytes);
    uint64_t* group_ranks = (uint64_t*) MFU_MALLOC(output_bytes);
    uint64_t* group_rank  = (uint64_t*) MFU_MALLOC(output_bytes);

    /* get total number of items across all tasks */
    uint64_t sum_checking_files;
    MPI_Allreduce(&checking_files, &sum_checking_files, 1,
                  MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);

    uint64_t chunk_id = 0;
    while (sum_checking_files > 1) {
        /* update the chunk id we'll read from all files */
        chunk_id++;

        /* iterate over our list and compute SHA256 value for each */
        ptr = list;
        for (i = 0; i < checking_files; i++) {
            /* get the flist index for this item */
            uint64_t idx = ptr[DDUP_KEY_SIZE];

            /* look up file name */
            const char* fname = mfu_flist_file_get_name(flist, idx);

            /* look up file size */
            file_size = mfu_flist_file_get_size(flist, idx);

            /* read a chunk of data from the file into chunk_buf */
            uint64_t data_size;
            status = read_data(fname, chunk_buf, chunk_id,
                               chunk_size, file_size, &data_size);
            if (status) {
                /* File size has been changed, TODO: handle */
                printf("failed to read file %s, maybe file "
                       "size has been modified during the "
                       "process", fname);
            }

            /* update the SHA256 context for this file */
            ctx_ptr = &file_items[idx].ctx;
            SHA256_Update(ctx_ptr, chunk_buf, data_size);

            /*
             * Use SHA256 value as key.
             * This is actually an hack, but SHA256_Final can't
             * be called multiple times with out changing ctx
             */
            SHA256_CTX ctx_tmp;
            memcpy(&ctx_tmp, ctx_ptr, sizeof(ctx_tmp));
            SHA256_Final((unsigned char*)(ptr + 1), &ctx_tmp);

            /* move on to next file in the list */
            ptr += DDUP_KEY_SIZE + 1;
        }

        /* Assign group ids and compute group sizes */
        uint64_t groups;
        DTCMP_Rankv(
            (int)checking_files, list,
            &groups, group_id, group_ranks, group_rank,
            key, keysat, cmp, DTCMP_FLAG_NONE, MPI_COMM_WORLD
        );

        /* any files assigned to a group of size 1 is unique,
         * any files in groups sizes > 1 for which we've read
         * all bytes are the same, and filter all other files
         * into a new list for another iteration */
        new_checking_files = 0;
        ptr = list;
        uint64_t* new_ptr = new_list;
        for (i = 0; i < checking_files; i++) {
            /* Get index into flist for this item */
            uint64_t idx = ptr[DDUP_KEY_SIZE];

            /* look up file name */
            const char* fname = mfu_flist_file_get_name(flist, idx);

            /* look up file size */
            file_size = mfu_flist_file_get_size(flist, idx);

            /* get a pointer to the SHA256 context for this file */
            ctx_ptr = &file_items[idx].ctx;

            if (group_ranks[i] == 1) {
                /*
                 * Only one file in this group,
                 * mfu_flist_file_name(flist, idx) is unique
                 */
            } else if (file_size <= (chunk_id * chunk_size)) {
                /*
                 * We've run out of bytes to checksum, and we
                 * still have a group size > 1
                 * mfu_flist_file_name(flist, idx) is a
                 * duplicate with other files that also have
                 * matching group_id[i]
                 */
                unsigned char digest[SHA256_DIGEST_LENGTH];
                SHA256_Final(digest, ctx_ptr);

                char digest_string[SHA256_DIGEST_LENGTH * 2 + 1];
                dump_sha256_digest(digest_string, digest);
                printf("%s %s\n", fname, digest_string);
            } else {
                /* Have multiple files with the same checksum,
                 * but still have bytes left to read, so keep
                 * this file
                 */

                /* use new group ID to segregate files,
                 * this id will be unique for all files of the
                 * same size and having the same hash up to
                 * this point */
                new_ptr[0] = group_id[i];

                /* Copy over flist index into new list entry */
                new_ptr[DDUP_KEY_SIZE] = idx;

                /* got one more in the new list */
                new_checking_files++;

                /* move on to next item in new list */
                new_ptr += DDUP_KEY_SIZE + 1;

                MFU_LOG(MFU_LOG_DBG, "checking file "
                          "\"%s\" for chunk index %d of size %"
                          PRIu64"\n", fname, (int)chunk_id,
                          chunk_size);
            }

            /* move on to next file in the list */
            ptr += DDUP_KEY_SIZE + 1;
        }

        /* Swap lists */
        uint64_t* tmp_list;
        tmp_list = list;
        list     = new_list;
        new_list = tmp_list;

        /* Update size of current list */
        checking_files = new_checking_files;

        /* Get new global list size */
        MPI_Allreduce(&checking_files, &sum_checking_files, 1,
                      MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
    }

    /* free the walk options */
    mfu_walk_opts_delete(&walk_opts);

    mfu_free(&group_rank);
    mfu_free(&group_ranks);
    mfu_free(&group_id);
    mfu_free(&new_list);
    mfu_free(&list);
    mfu_free(&file_items);
    mfu_free(&chunk_buf);
    mfu_flist_free(&flist);

    mtcmp_cmp_fini(&cmp);
    mpi_type_fini(&key, &keysat);

    status = 0;

out:
    mfu_finalize();
    MPI_Finalize();

    return status;
}
예제 #9
0
/* removes list of items, sets write bits on directories from
 * top-to-bottom, then removes items one level at a time starting
 * from the deepest */
void mfu_flist_unlink(mfu_flist flist)
{
    int level;

    /* wait for all tasks and start timer */
    MPI_Barrier(MPI_COMM_WORLD);
    double start_remove = MPI_Wtime();

    /* split files into separate lists by directory depth */
    int levels, minlevel;
    mfu_flist* lists;
    mfu_flist_array_by_depth(flist, &levels, &minlevel, &lists);

#if 0
    /* dive from shallow to deep, ensure all directories have write bit set */
    for (level = 0; level < levels; level++) {
        /* get list of items for this level */
        mfu_flist list = lists[level];

        /* determine whether we have details at this level */
        int detail = mfu_flist_have_detail(list);

        /* iterate over items and set write bit on directories if needed */
        uint64_t idx;
        uint64_t size = mfu_flist_size(list);
        for (idx = 0; idx < size; idx++) {
            /* check whether we have a directory */
            mfu_filetype type = mfu_flist_file_get_type(list, idx);
            if (type == MFU_TYPE_DIR) {
                /* assume we have to set the bit */
                int set_write_bit = 1;
                if (detail) {
                    mode_t mode = (mode_t) mfu_flist_file_get_mode(list, idx);
                    if (mode & S_IWUSR) {
                        /* we have the mode of the file, and the bit is already set */
                        set_write_bit = 0;
                    }
                }

                /* set the bit if needed */
                if (set_write_bit) {
                    const char* name = mfu_flist_file_get_name(list, idx);
                    int rc = chmod(name, S_IRWXU);
                    if (rc != 0) {
                        MFU_LOG(MFU_LOG_ERR, "Failed to chmod directory `%s' (errno=%d %s)",
                                  name, errno, strerror(errno)
                                 );
                    }
                }
            }
        }

        /* wait for all procs to finish before we start next level */
        MPI_Barrier(MPI_COMM_WORLD);
    }
#endif

    /* now remove files starting from deepest level */
    for (level = levels - 1; level >= 0; level--) {
        double start = MPI_Wtime();

        /* get list of items for this level */
        mfu_flist list = lists[level];

        uint64_t count = 0;
        //remove_direct(list, &count);
        remove_spread(list, &count);
//        remove_map(list, &count);
//        remove_sort(list, &count);
//        remove_libcircle(list, &count);
//        TODO: remove sort w/ spread

        /* wait for all procs to finish before we start
         * with files at next level */
        MPI_Barrier(MPI_COMM_WORLD);

        double end = MPI_Wtime();

        if (mfu_debug_level >= MFU_LOG_VERBOSE) {
            uint64_t min, max, sum;
            MPI_Allreduce(&count, &min, 1, MPI_UINT64_T, MPI_MIN, MPI_COMM_WORLD);
            MPI_Allreduce(&count, &max, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD);
            MPI_Allreduce(&count, &sum, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
            double rate = 0.0;
            if (end - start > 0.0) {
                rate = (double)sum / (end - start);
            }
            double time_diff = end - start;
            if (mfu_rank == 0) {
                printf("level=%d min=%lu max=%lu sum=%lu rate=%f secs=%f\n",
                       (minlevel + level), (unsigned long)min, (unsigned long)max, (unsigned long)sum, rate, time_diff
                      );
                fflush(stdout);
            }
        }
    }

    mfu_flist_array_free(levels, &lists);

    /* wait for all tasks and stop timer */
    MPI_Barrier(MPI_COMM_WORLD);
    double end_remove = MPI_Wtime();

    /* report remove count, time, and rate */
    if (mfu_debug_level >= MFU_LOG_VERBOSE && mfu_rank == 0) {
        uint64_t all_count = mfu_flist_global_size(flist);
        double time_diff = end_remove - start_remove;
        double rate = 0.0;
        if (time_diff > 0.0) {
            rate = ((double)all_count) / time_diff;
        }
        printf("Removed %lu items in %f seconds (%f items/sec)\n",
               all_count, time_diff, rate
              );
    }

    return;
}
예제 #10
0
/* for each depth, sort files by filename and then remove, to test
 * whether it matters to limit the number of directories each process
 * has to reference (e.g., locking) */
static void remove_sort(mfu_flist list, uint64_t* rmcount)
{
    /* bail out if total count is 0 */
    uint64_t all_count = mfu_flist_global_size(list);
    if (all_count == 0) {
        return;
    }

    /* get maximum file name and number of items */
    int chars = (int) mfu_flist_file_max_name(list);
    uint64_t my_count = mfu_flist_size(list);

    /* create key datatype (filename) and comparison op */
    MPI_Datatype dt_key;
    DTCMP_Op op_str;
    DTCMP_Str_create_ascend(chars, &dt_key, &op_str);

    /* create keysat datatype (filename + type) */
    MPI_Datatype types[2], dt_keysat;
    types[0] = dt_key;
    types[1] = MPI_CHAR;
    DTCMP_Type_create_series(2, types, &dt_keysat);

    /* allocate send buffer */
    int sendcount = (int) my_count;
    size_t sendbufsize = (size_t)(sendcount * (chars + 1));
    char* sendbuf = (char*) MFU_MALLOC(sendbufsize);

    /* copy data into buffer */
    char* ptr = sendbuf;
    uint64_t idx;
    for (idx = 0; idx < my_count; idx++) {
        /* encode the filename first */
        const char* name = mfu_flist_file_get_name(list, idx);
        strcpy(ptr, name);
        ptr += chars;

        /* last character encodes item type */
        mfu_filetype type = mfu_flist_file_get_type(list, idx);
        if (type == MFU_TYPE_DIR) {
            ptr[0] = 'd';
        }
        else if (type == MFU_TYPE_FILE || type == MFU_TYPE_LINK) {
            ptr[0] = 'f';
        }
        else {
            ptr[0] = 'u';
        }
        ptr++;
    }

    /* sort items */
    void* recvbuf;
    int recvcount;
    DTCMP_Handle handle;
    DTCMP_Sortz(
        sendbuf, sendcount, &recvbuf, &recvcount,
        dt_key, dt_keysat, op_str, DTCMP_FLAG_NONE, MPI_COMM_WORLD, &handle
    );

    /* delete data */
    int delcount = 0;
    ptr = (char*)recvbuf;
    while (delcount < recvcount) {
        /* get item name */
        char* name = ptr;
        ptr += chars;

        /* get item type */
        char type = ptr[0];
        ptr++;

        /* delete item */
        remove_type(type, name);
        delcount++;
    }

    /* record number of items we deleted */
    *rmcount = (uint64_t) delcount;

    /* free output data */
    DTCMP_Free(&handle);

    /* free our send buffer */
    mfu_free(&sendbuf);

    /* free key comparison operation */
    DTCMP_Op_free(&op_str);

    /* free datatypes */
    MPI_Type_free(&dt_keysat);
    MPI_Type_free(&dt_key);

    return;
}
예제 #11
0
/* for given depth, evenly spread the files among processes for
 * improved load balancing */
static void remove_spread(mfu_flist flist, uint64_t* rmcount)
{
    uint64_t idx;

    /* initialize our remove count */
    *rmcount = 0;

    /* get our rank and number of ranks in job */
    int rank, ranks;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &ranks);

    /* allocate memory for alltoall exchanges */
    size_t bufsize = (size_t)ranks * sizeof(int);
    int* sendcounts = (int*) MFU_MALLOC(bufsize);
    int* sendsizes  = (int*) MFU_MALLOC(bufsize);
    int* senddisps  = (int*) MFU_MALLOC(bufsize);
    int* recvsizes  = (int*) MFU_MALLOC(bufsize);
    int* recvdisps  = (int*) MFU_MALLOC(bufsize);

    /* get number of items */
    uint64_t my_count  = mfu_flist_size(flist);
    uint64_t all_count = mfu_flist_global_size(flist);
    uint64_t offset    = mfu_flist_global_offset(flist);

    /* compute number of bytes we'll send */
    size_t sendbytes = 0;
    for (idx = 0; idx < my_count; idx++) {
        const char* name = mfu_flist_file_get_name(flist, idx);
        size_t len = strlen(name) + 2;
        sendbytes += len;
    }

    /* compute the number of items that each rank should have */
    uint64_t low = all_count / (uint64_t)ranks;
    uint64_t extra = all_count - low * (uint64_t)ranks;

    /* compute number that we'll send to each rank and initialize sendsizes and offsets */
    uint64_t i;
    for (i = 0; i < (uint64_t)ranks; i++) {
        /* compute starting element id and count for given rank */
        uint64_t start, num;
        if (i < extra) {
            num = low + 1;
            start = i * num;
        }
        else {
            num = low;
            start = (i - extra) * num + extra * (low + 1);
        }

        /* compute the number of items we'll send to this task */
        uint64_t sendcnt = 0;
        if (my_count > 0) {
            if (start <= offset && offset < start + num) {
                /* this rank overlaps our range,
                 * and its first element comes at or before our first element */
                sendcnt = num - (offset - start);
                if (my_count < sendcnt) {
                    /* the number the rank could receive from us
                     * is more than we have left */
                    sendcnt = my_count;
                }
            }
            else if (offset < start && start < offset + my_count) {
                /* this rank overlaps our range,
                 * and our first element comes strictly before its first element */
                sendcnt = my_count - (start - offset);
                if (num < sendcnt) {
                    /* the number the rank can receive from us
                     * is less than we have left */
                    sendcnt = num;
                }
            }
        }

        /* record the number of items we'll send to this task */
        sendcounts[i]  = (int) sendcnt;

        /* set sizes and displacements to 0, we'll fix this later */
        sendsizes[i] = 0;
        senddisps[i] = 0;
    }

    /* allocate space */
    char* sendbuf = (char*) MFU_MALLOC(sendbytes);

    /* copy data into buffer */
    int dest = -1;
    int disp = 0;
    for (idx = 0; idx < my_count; idx++) {
        /* get name and type of item */
        const char* name = mfu_flist_file_get_name(flist, idx);
        mfu_filetype type = mfu_flist_file_get_type(flist, idx);

        /* get rank that we're packing data for */
        if (dest == -1) {
            dest = get_first_nonzero(sendcounts, ranks);
            if (dest == -1) {
                /* error */
            }
            /* about to copy first item for this rank,
             * record its displacement */
            senddisps[dest] = disp;
        }

        /* identify region to be sent to rank */
        char* path = sendbuf + disp;

        /* first character encodes item type */
        if (type == MFU_TYPE_DIR) {
            path[0] = 'd';
        }
        else if (type == MFU_TYPE_FILE || type == MFU_TYPE_LINK) {
            path[0] = 'f';
        }
        else {
            path[0] = 'u';
        }

        /* now copy in the path */
        strcpy(&path[1], name);

        /* TODO: check that we don't overflow the int */
        /* add bytes to sendsizes and increase displacement */
        size_t count = strlen(name) + 2;
        sendsizes[dest] += (int) count;
        disp += (int) count;

        /* decrement the count for this rank */
        sendcounts[dest]--;
        if (sendcounts[dest] == 0) {
            dest = -1;
        }
    }

    /* compute displacements */
    senddisps[0] = 0;
    for (i = 1; i < (uint64_t)ranks; i++) {
        senddisps[i] = senddisps[i - 1] + sendsizes[i - 1];
    }

    /* alltoall to specify incoming counts */
    MPI_Alltoall(sendsizes, 1, MPI_INT, recvsizes, 1, MPI_INT, MPI_COMM_WORLD);

    /* compute size of recvbuf and displacements */
    size_t recvbytes = 0;
    recvdisps[0] = 0;
    for (i = 0; i < (uint64_t)ranks; i++) {
        recvbytes += (size_t) recvsizes[i];
        if (i > 0) {
            recvdisps[i] = recvdisps[i - 1] + recvsizes[i - 1];
        }
    }

    /* allocate recvbuf */
    char* recvbuf = (char*) MFU_MALLOC(recvbytes);

    /* alltoallv to send data */
    MPI_Alltoallv(
        sendbuf, sendsizes, senddisps, MPI_CHAR,
        recvbuf, recvsizes, recvdisps, MPI_CHAR, MPI_COMM_WORLD
    );

    /* delete data */
    char* item = recvbuf;
    while (item < recvbuf + recvbytes) {
        /* get item name and type */
        char type = item[0];
        char* name = &item[1];

        /* delete item */
        remove_type(type, name);

        /* keep tally of number of items we deleted */
        *rmcount++;

        /* go to next item */
        size_t item_size = strlen(item) + 1;
        item += item_size;
    }

    /* free memory */
    mfu_free(&recvbuf);
    mfu_free(&recvdisps);
    mfu_free(&recvsizes);
    mfu_free(&sendbuf);
    mfu_free(&senddisps);
    mfu_free(&sendsizes);
    mfu_free(&sendcounts);

    return;
}
예제 #12
0
static void mfu_flist_archive_create_libcircle(mfu_flist flist, const char* archivefile, mfu_archive_options_t* opts)
{
    DTAR_flist = flist;
    DTAR_user_opts = *opts;

    MPI_Comm_rank(MPI_COMM_WORLD, &DTAR_rank);

    /* TODO: stripe the archive file if on parallel file system */

    /* init statistics */
    DTAR_statistics.total_dirs  = 0;
    DTAR_statistics.total_files = 0;
    DTAR_statistics.total_links = 0;
    DTAR_statistics.total_size  = 0;
    DTAR_statistics.total_bytes_copied = 0;

    time(&(DTAR_statistics.time_started));
    DTAR_statistics.wtime_started = MPI_Wtime();

    /* create the archive file */
    DTAR_writer.name = archivefile;
    DTAR_writer.flags = O_WRONLY | O_CREAT | O_CLOEXEC | O_LARGEFILE;
    DTAR_writer.fd_tar = open(archivefile, DTAR_writer.flags, 0664);

    /* get number of items in our portion of the list */
    DTAR_count = mfu_flist_size(DTAR_flist);

    /* allocate memory for file sizes and offsets */
    uint64_t* fsizes = (uint64_t*) MFU_MALLOC(DTAR_count * sizeof(uint64_t));
    DTAR_offsets     = (uint64_t*) MFU_MALLOC(DTAR_count * sizeof(uint64_t));

    /* compute local offsets for each item and total
     * bytes we're contributing to the archive */
    uint64_t idx;
    uint64_t offset = 0;
    for (idx = 0; idx < DTAR_count; idx++) {
        /* assume the item takes no space */
        fsizes[idx] = 0;

        /* identify item type to compute its size in the archive */
        mfu_filetype type = mfu_flist_file_get_type(DTAR_flist, idx);
        if (type == MFU_TYPE_DIR || type == MFU_TYPE_LINK) {
            /* directories and symlinks only need the header */
            fsizes[idx] = DTAR_HDR_LENGTH;
        } else if (type == MFU_TYPE_FILE) {
            /* regular file requires a header, plus file content,
             * and things are packed into blocks of 512 bytes */
            uint64_t fsize = mfu_flist_file_get_size(DTAR_flist, idx);

            /* determine whether file size is integer multiple of 512 bytes */
            uint64_t rem = fsize % 512;
            if (rem == 0) {
                /* file content is multiple of 512 bytes, so perfect fit */
                fsizes[idx] = fsize + DTAR_HDR_LENGTH;
            } else {
                /* TODO: check and explain this math */
                fsizes[idx] = (fsize / 512 + 4) * 512;
            }

        }

        /* increment our local offset for this item */
        DTAR_offsets[idx] = offset;
        offset += fsizes[idx];
    }

    /* execute scan to figure our global base offset in the archive file */
    uint64_t global_offset = 0;
    MPI_Scan(&offset, &global_offset, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
    global_offset -= offset;

    /* update offsets for each of our file to their global offset */
    for (idx = 0; idx < DTAR_count; idx++) {
        DTAR_offsets[idx] += global_offset;
    }

    /* create an archive */
    struct archive* ar = archive_write_new();

    archive_write_set_format_pax(ar);

    int r = archive_write_open_fd(ar, DTAR_writer.fd_tar);
    if (r != ARCHIVE_OK) {
        MFU_LOG(MFU_LOG_ERR, "archive_write_open_fd(): %s", archive_error_string(ar));
        DTAR_abort(EXIT_FAILURE);
    }

    /* write headers for our files */
    for (idx = 0; idx < DTAR_count; idx++) {
        mfu_filetype type = mfu_flist_file_get_type(DTAR_flist, idx);
        if (type == MFU_TYPE_FILE || type == MFU_TYPE_DIR || type == MFU_TYPE_LINK) {
            DTAR_write_header(ar, idx, DTAR_offsets[idx]);
        }
    }

    /* prepare libcircle */
    CIRCLE_init(0, NULL, CIRCLE_SPLIT_EQUAL | CIRCLE_CREATE_GLOBAL);
    CIRCLE_loglevel loglevel = CIRCLE_LOG_WARN;
    CIRCLE_enable_logging(loglevel);

    /* register callbacks */
    CIRCLE_cb_create(&DTAR_enqueue_copy);
    CIRCLE_cb_process(&DTAR_perform_copy);

    /* run the libcircle job to copy data into archive file */
    CIRCLE_begin();
    CIRCLE_finalize();

    /* compute total bytes copied */
    uint64_t archive_size = 0;
    MPI_Allreduce(&offset, &archive_size, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
    DTAR_statistics.total_size = archive_size;

    DTAR_statistics.wtime_ended = MPI_Wtime();
    time(&(DTAR_statistics.time_ended));

    /* print stats */
    double rel_time = DTAR_statistics.wtime_ended - \
                      DTAR_statistics.wtime_started;
    if (DTAR_rank == 0) {
        char starttime_str[256];
        struct tm* localstart = localtime(&(DTAR_statistics.time_started));
        strftime(starttime_str, 256, "%b-%d-%Y, %H:%M:%S", localstart);

        char endtime_str[256];
        struct tm* localend = localtime(&(DTAR_statistics.time_ended));
        strftime(endtime_str, 256, "%b-%d-%Y, %H:%M:%S", localend);

        /* add two 512 blocks at the end */
        DTAR_statistics.total_size += 512 * 2;

        /* convert bandwidth to unit */
        double agg_rate_tmp;
        double agg_rate = (double) DTAR_statistics.total_size / rel_time;
        const char* agg_rate_units;
        mfu_format_bytes(agg_rate, &agg_rate_tmp, &agg_rate_units);

        MFU_LOG(MFU_LOG_INFO, "Started:    %s", starttime_str);
        MFU_LOG(MFU_LOG_INFO, "Completed:  %s", endtime_str);
        MFU_LOG(MFU_LOG_INFO, "Total archive size: %" PRIu64, DTAR_statistics.total_size);
        MFU_LOG(MFU_LOG_INFO, "Rate: %.3lf %s " \
                "(%.3" PRIu64 " bytes in %.3lf seconds)", \
                agg_rate_tmp, agg_rate_units, DTAR_statistics.total_size, rel_time);
    }

    /* clean up */
    mfu_free(&fsizes);
    mfu_free(&DTAR_offsets);

    /* close archive file */
    archive_write_free(ar);
    mfu_close(DTAR_writer.name, DTAR_writer.fd_tar);
}
예제 #13
0
static int sort_files_readdir(const char* sortfields, mfu_flist* pflist)
{
    /* get list from caller */
    mfu_flist flist = *pflist;

    /* create a new list as subset of original list */
    mfu_flist flist2 = mfu_flist_subset(flist);

    uint64_t incount = mfu_flist_size(flist);
    uint64_t chars   = mfu_flist_file_max_name(flist);

    /* create datatype for packed file list element */
    MPI_Datatype dt_sat;
    size_t bytes = mfu_flist_file_pack_size(flist);
    MPI_Type_contiguous((int)bytes, MPI_BYTE, &dt_sat);

    /* get our rank and the size of comm_world */
    int rank, ranks;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &ranks);

    /* build type for file path */
    MPI_Datatype dt_filepath;
    MPI_Type_contiguous((int)chars, MPI_CHAR, &dt_filepath);
    MPI_Type_commit(&dt_filepath);

    /* build comparison op for filenames */
    DTCMP_Op op_filepath;
    if (DTCMP_Op_create(dt_filepath, my_strcmp, &op_filepath) != DTCMP_SUCCESS) {
        MFU_ABORT(1, "Failed to create sorting operation for filepath");
    }

    /* build comparison op for filenames */
    DTCMP_Op op_filepath_rev;
    if (DTCMP_Op_create(dt_filepath, my_strcmp_rev, &op_filepath_rev) != DTCMP_SUCCESS) {
        MFU_ABORT(1, "Failed to create reverse sorting operation for filepath");
    }

    /* TODO: process sort fields */
    const int MAXFIELDS = 1;
    MPI_Datatype types[MAXFIELDS];
    DTCMP_Op ops[MAXFIELDS];
    sort_field fields[MAXFIELDS];
    size_t lengths[MAXFIELDS];
    int nfields = 0;
    for (nfields = 0; nfields < MAXFIELDS; nfields++) {
        types[nfields]   = MPI_DATATYPE_NULL;
        ops[nfields]     = DTCMP_OP_NULL;
    }
    nfields = 0;
    char* sortfields_copy = MFU_STRDUP(sortfields);
    char* token = strtok(sortfields_copy, ",");
    while (token != NULL) {
        int valid = 1;
        if (strcmp(token, "name") == 0) {
            types[nfields]   = dt_filepath;
            ops[nfields]     = op_filepath;
            fields[nfields]  = FILENAME;
            lengths[nfields] = chars;
        }
        else if (strcmp(token, "-name") == 0) {
            types[nfields]   = dt_filepath;
            ops[nfields]     = op_filepath_rev;
            fields[nfields]  = FILENAME;
            lengths[nfields] = chars;
        }
        else {
            /* invalid token */
            valid = 0;
            if (rank == 0) {
                MFU_LOG(MFU_LOG_ERR, "Invalid sort field: %s\n", token);
            }
        }
        if (valid) {
            nfields++;
        }
        if (nfields > MAXFIELDS) {
            /* TODO: print warning if we have too many fields */
            break;
        }
        token = strtok(NULL, ",");
    }
    mfu_free(&sortfields_copy);

    /* build key type */
    MPI_Datatype dt_key;
    if (DTCMP_Type_create_series(nfields, types, &dt_key) != DTCMP_SUCCESS) {
        MFU_ABORT(1, "Failed to create type for key");
    }

    /* create sort op */
    DTCMP_Op op_key;
    if (DTCMP_Op_create_series(nfields, ops, &op_key) != DTCMP_SUCCESS) {
        MFU_ABORT(1, "Failed to create sorting operation for key");
    }

    /* build keysat type */
    MPI_Datatype dt_keysat, keysat_types[2];
    keysat_types[0] = dt_key;
    keysat_types[1] = dt_sat;
    if (DTCMP_Type_create_series(2, keysat_types, &dt_keysat) != DTCMP_SUCCESS) {
        MFU_ABORT(1, "Failed to create type for keysat");
    }

    /* get extent of key type */
    MPI_Aint key_lb, key_extent;
    MPI_Type_get_extent(dt_key, &key_lb, &key_extent);

    /* get extent of keysat type */
    MPI_Aint keysat_lb, keysat_extent;
    MPI_Type_get_extent(dt_keysat, &keysat_lb, &keysat_extent);

    /* get extent of sat type */
    MPI_Aint sat_lb, sat_extent;
    MPI_Type_get_extent(dt_sat, &sat_lb, &sat_extent);

    /* compute size of sort element and allocate buffer */
    size_t sortbufsize = (size_t)keysat_extent * incount;
    void* sortbuf = MFU_MALLOC(sortbufsize);

    /* copy data into sort elements */
    uint64_t idx = 0;
    char* sortptr = (char*) sortbuf;
    while (idx < incount) {
        /* copy in access time */
        int i;
        for (i = 0; i < nfields; i++) {
            if (fields[i] == FILENAME) {
                const char* name = mfu_flist_file_get_name(flist, idx);
                strcpy(sortptr, name);
            }
            sortptr += lengths[i];
        }

        /* pack file element */
        sortptr += mfu_flist_file_pack(sortptr, flist, idx);

        idx++;
    }

    /* sort data */
    void* outsortbuf;
    int outsortcount;
    DTCMP_Handle handle;
    int sort_rc = DTCMP_Sortz(
                      sortbuf, (int)incount, &outsortbuf, &outsortcount,
                      dt_key, dt_keysat, op_key, DTCMP_FLAG_NONE,
                      MPI_COMM_WORLD, &handle
                  );
    if (sort_rc != DTCMP_SUCCESS) {
        MFU_ABORT(1, "Failed to sort data");
    }

    /* step through sorted data filenames */
    idx = 0;
    sortptr = (char*) outsortbuf;
    while (idx < (uint64_t)outsortcount) {
        sortptr += key_extent;
        sortptr += mfu_flist_file_unpack(sortptr, flist2);
        idx++;
    }

    /* build summary of new list */
    mfu_flist_summarize(flist2);

    /* free memory */
    DTCMP_Free(&handle);

    /* free ops */
    DTCMP_Op_free(&op_key);
    DTCMP_Op_free(&op_filepath_rev);
    DTCMP_Op_free(&op_filepath);

    /* free types */
    MPI_Type_free(&dt_keysat);
    MPI_Type_free(&dt_key);
    MPI_Type_free(&dt_filepath);

    /* free input buffer holding sort elements */
    mfu_free(&sortbuf);

    /* free the satellite type */
    MPI_Type_free(&dt_sat);

    /* return new list and free old one */
    *pflist = flist2;
    mfu_flist_free(&flist);

    return MFU_SUCCESS;
}
예제 #14
0
파일: dstripe.c 프로젝트: hpc/fileutils
int main(int argc, char* argv[])
{
    MPI_Init(&argc, &argv);
    mfu_init();

    /* get our rank and number of ranks in the job */
    int rank, ranks;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &ranks);

    /* pointer to mfu_walk_opts */
    mfu_walk_opts_t* walk_opts = mfu_walk_opts_new();

    uint64_t idx;
    int option_index = 0;
    int usage = 0;
    int report = 0;
    unsigned int numpaths = 0;
    mfu_param_path* paths = NULL;
    unsigned long long bytes;

    /* verbose by default */
    mfu_debug_level = MFU_LOG_VERBOSE;

    /* default to 1MB stripe size, stripe across all OSTs, and all files are candidates */
    int stripes = -1;
    uint64_t stripe_size = 1048576;
    uint64_t min_size = 0;

    static struct option long_options[] = {
        {"count",    1, 0, 'c'},
        {"size",     1, 0, 's'},
        {"minsize",  1, 0, 'm'},
        {"report",   0, 0, 'r'},
        {"progress", 1, 0, 'P'},
        {"verbose",  0, 0, 'v'},
        {"quiet",    0, 0, 'q'},
        {"help",     0, 0, 'h'},
        {0, 0, 0, 0}
    };

    while (1) {
        int c = getopt_long(argc, argv, "c:s:m:rvqh",
                    long_options, &option_index);

        if (c == -1) {
            break;
        }

        switch (c) {
            case 'c':
                /* stripe count */
                stripes = atoi(optarg);
                break;
            case 's':
                /* stripe size in bytes */
                if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS) {
                    if (rank == 0) {
                        MFU_LOG(MFU_LOG_ERR, "Failed to parse stripe size: %s", optarg);
                    }
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
                stripe_size = (uint64_t)bytes;
                break;
            case 'm':
                /* min file size in bytes */
                if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS) {
                    if (rank == 0) {
                        MFU_LOG(MFU_LOG_ERR, "Failed to parse minimum file size: %s", optarg);
                    }
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
                min_size = (uint64_t)bytes;
                break;
            case 'r':
                /* report striping info */
		report = 1;
                break;
            case 'P':
                mfu_progress_timeout = atoi(optarg);
                break;
            case 'v':
                mfu_debug_level = MFU_LOG_VERBOSE;
                break;
            case 'q':
                mfu_debug_level = MFU_LOG_NONE;
                break;
            case 'h':
                /* display usage */
                usage = 1;
                break;
            case '?':
                /* display usage */
                usage = 1;
                break;
            default:
                if (rank == 0) {
                    printf("?? getopt returned character code 0%o ??\n", c);
                }
        }
    }

    /* check that we got a valid progress value */
    if (mfu_progress_timeout < 0) {
        if (rank == 0) {
            MFU_LOG(MFU_LOG_ERR, "Seconds in --progress must be non-negative: %d invalid", mfu_progress_timeout);
        }
        usage = 1;
    }

    /* paths to walk come after the options */
    if (optind < argc) {
        /* determine number of paths specified by user */
        numpaths = argc - optind;

        /* allocate space for each path */
        paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path));

        /* process each path */
        char** p = &argv[optind];
        mfu_param_path_set_all((uint64_t)numpaths, (const char**)p, paths);
        optind += numpaths;
    } else {
        usage = 1;
    }

    /* if we need to print usage, print it and exit */
    if (usage) {
        if (rank == 0) {
            print_usage();
        }

        mfu_finalize();
        MPI_Finalize();
        return 1;
    }

    /* nothing to do if lustre support is disabled */
#ifndef LUSTRE_SUPPORT
    if (rank == 0) {
        MFU_LOG(MFU_LOG_ERR, "Lustre support is disabled.");
    }
    MPI_Abort(MPI_COMM_WORLD, 1);
#endif

    /* stripe count must be -1 for all available or greater than 0 */
    if (stripes < -1) {
        if (rank == 0) {
            MFU_LOG(MFU_LOG_ERR, "Stripe count must be -1 for all servers, 0 for lustre file system default, or a positive value");
        }
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* lustre requires stripe sizes to be aligned */
    if (stripe_size > 0 && stripe_size % 65536 != 0) {
        if (rank == 0) {
            MFU_LOG(MFU_LOG_ERR, "Stripe size must be a multiple of 65536");
        }
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* TODO: verify that source / target are on Lustre */

    /* walk list of input paths and stat as we walk */
    mfu_flist flist = mfu_flist_new();
    mfu_flist_walk_param_paths(numpaths, paths, walk_opts, flist);

    /* filter down our list to files which don't meet our striping requirements */
    mfu_flist filtered = filter_list(flist, stripes, stripe_size, min_size, &create_prog_count_total, &stripe_prog_bytes_total);
    mfu_flist_free(&flist);

    MPI_Barrier(MPI_COMM_WORLD);

    /* report the file size and stripe count of all files we found */
    if (report) {
        /* report the files in our filtered list */
        stripe_info_report(filtered);

        /* free the paths and our list */
        mfu_flist_free(&filtered);
        mfu_param_path_free_all(numpaths, paths);
        mfu_free(&paths);

        /* finalize */
        mfu_finalize();
        MPI_Finalize();
        return 0;
    }

    /* generate a global suffix for our temp files and have each node check it's list */
    char suffix[8];
    uint64_t retry;

    /* seed our random number generator */
    srand(time(NULL));

    /* keep trying to make a valid random suffix...*/
    do {
        uint64_t attempt = 0;

        /* make rank 0 responsible for generating a random suffix */
        if (rank == 0) {
            generate_suffix(suffix, sizeof(suffix));
        }

        /* broadcast the random suffix to all ranks */
        MPI_Bcast(suffix, sizeof(suffix), MPI_CHAR, 0, MPI_COMM_WORLD);

        /* check that the file doesn't already exist */
        uint64_t size = mfu_flist_size(filtered);
        for (idx = 0; idx < size; idx++) {
            char temp_path[PATH_MAX];
            strcpy(temp_path, mfu_flist_file_get_name(filtered, idx));
            strcat(temp_path, suffix);
            if(!mfu_access(temp_path, F_OK)) {
                /* the file already exists */
                attempt = 1;
                break;
            }
        }

        /* do a reduce to figure out if a rank has a file collision */
        MPI_Allreduce(&attempt, &retry, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD);
    } while(retry != 0);

    /* initialize progress messages while creating files */
    create_prog_count = 0;
    create_prog = mfu_progress_start(mfu_progress_timeout, 1, MPI_COMM_WORLD, create_progress_fn);

    /* create new files so we can restripe */
    uint64_t size = mfu_flist_size(filtered);
    for (idx = 0; idx < size; idx++) {
        char temp_path[PATH_MAX];
        strcpy(temp_path, mfu_flist_file_get_name(filtered, idx));
        strcat(temp_path, suffix);

        /* create a striped file at the temp file path */
        mfu_stripe_set(temp_path, stripe_size, stripes);

        /* update our status for file create progress */
        create_prog_count++;
        mfu_progress_update(&create_prog_count, create_prog);
    }

    /* finalize file create progress messages */
    mfu_progress_complete(&create_prog_count, &create_prog);

    MPI_Barrier(MPI_COMM_WORLD);

    /* initialize progress messages while copying data */
    stripe_prog_bytes = 0;
    stripe_prog = mfu_progress_start(mfu_progress_timeout, 1, MPI_COMM_WORLD, stripe_progress_fn);

    /* found a suffix, now we need to break our files into chunks based on stripe size */
    mfu_file_chunk* file_chunks = mfu_file_chunk_list_alloc(filtered, stripe_size);
    mfu_file_chunk* p = file_chunks;
    while (p != NULL) {
        /* build path to temp file */
        char temp_path[PATH_MAX];
        strcpy(temp_path, p->name);
        strcat(temp_path, suffix);

        /* write each chunk in our list */
        write_file_chunk(p, temp_path);

        /* move on to next file chunk */
        p = p->next;
    }
    mfu_file_chunk_list_free(&file_chunks);

    /* finalize progress messages */
    mfu_progress_complete(&stripe_prog_bytes, &stripe_prog);

    MPI_Barrier(MPI_COMM_WORLD);

    /* remove input file and rename temp file */
    for (idx = 0; idx < size; idx++) {
        /* build path to temp file */
        const char *in_path = mfu_flist_file_get_name(filtered, idx);
        char out_path[PATH_MAX];
        strcpy(out_path, in_path);
        strcat(out_path, suffix);

        /* change the mode of the newly restriped file to be the same as the old one */
        mode_t mode = (mode_t) mfu_flist_file_get_mode(filtered, idx);
        if (mfu_chmod(out_path, mode) != 0) {
            MFU_LOG(MFU_LOG_ERR, "Failed to chmod file %s (%s)", out_path, strerror(errno));
            MPI_Abort(MPI_COMM_WORLD, 1);
        }

        /* rename the new, restriped file to the old name */
        if (rename(out_path, in_path) != 0) {
            MFU_LOG(MFU_LOG_ERR, "Failed to rename file %s to %s", out_path, in_path);
            MPI_Abort(MPI_COMM_WORLD, 1);
        }
    }

    /* wait for everyone to finish */
    MPI_Barrier(MPI_COMM_WORLD);

    /* free the walk options */
    mfu_walk_opts_delete(&walk_opts);

    /* free filtered list, path parameters */
    mfu_flist_free(&filtered);
    mfu_param_path_free_all(numpaths, paths);
    mfu_free(&paths);

    mfu_finalize();
    MPI_Finalize();

    return 0;
}
예제 #15
0
파일: dwalk.c 프로젝트: hpc/fileutils
static int print_flist_distribution(int file_histogram,
                                    struct distribute_option *option,
                                    mfu_flist* pflist, int rank)
{
    /* file list to use */
    mfu_flist flist = *pflist;

    /* get local size for each rank, and max file sizes */
    uint64_t size = mfu_flist_size(flist);
    uint64_t global_max_file_size;

    int separators = 0;
    if (file_histogram) {
        /* create default separators */
        create_default_separators(option, &flist, &size, &separators,
                                  &global_max_file_size);
    } else {
        separators = option->separator_number;
    }

    /* allocate a count for each bin, initialize the bin counts to 0
     * it is separator + 1 because the last bin is the last separator
     * to the DISTRIBUTE_MAX */
    uint64_t* dist = (uint64_t*) MFU_MALLOC((separators + 1) * sizeof(uint64_t));

    /* initialize the bin counts to 0 */
    for (int i = 0; i <= separators; i++) {
        dist[i] = 0;
    }

    /* for each file, identify appropriate bin and increment its count */
    for (int i = 0; i < size; i++) {
         /* get the size of the file */
         uint64_t file_size = mfu_flist_file_get_size(flist, i);

         /* loop through the bins and find the one the file belongs to,
          * set last bin to -1, if a bin is not found while looping through the
          * list of file size separators, then it belongs in the last bin
          * so (last file size - MAX bin) */
         int max_bin_flag = -1;
         for (int j = 0; j < separators; j++) {
             if (file_size <= option->separators[j]) {
                 /* found the bin set bin index & increment its count */
                 dist[j]++;

                 /* a file for this bin was found so can't belong to
                  * last bin (so set the flag) & exit the loop */
                 max_bin_flag = 1;
                 break;
             }
         }

         /* if max_bin_flag is still -1 then the file belongs to the last bin */
         if (max_bin_flag < 0) {
             dist[separators]++;
         }
    }

    /* get the total sum across all of the bins */
    uint64_t* disttotal = (uint64_t*) MFU_MALLOC((separators + 1) * sizeof(uint64_t));
    MPI_Allreduce(dist, disttotal, (uint64_t)separators + 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);

    /* Print the file distribution */
    if (rank == 0) {
         /* number of files in a bin */
         uint64_t number;
         double size_tmp;
         const char* size_units;
         printf("%-27s %s\n", "Range", "Number");
         for (int i = 0; i <= separators; i++) {
             printf("%s", "[ ");
             if (i == 0) {
                 printf("%7.3lf %2s", 0.000, "B");
             } else {
                 mfu_format_bytes((uint64_t)option->separators[i - 1],
                                  &size_tmp, &size_units);
                 printf("%7.3lf %2s", size_tmp, size_units);
             }

             printf("%s", " - ");

             if (file_histogram) {
                mfu_format_bytes((uint64_t)option->separators[i],
                                 &size_tmp, &size_units);
                number = disttotal[i];
                mfu_format_bytes((uint64_t)option->separators[i],
                                 &size_tmp, &size_units);
                printf("%7.3lf %2s ) %"PRIu64"\n", size_tmp,
                       size_units, number);
             } else {
                if (i == separators) {
                    number = disttotal[i];
                    printf("%10s ) %"PRIu64"\n", "MAX", number);
                } else {
                    number = disttotal[i];
                    mfu_format_bytes((uint64_t)option->separators[i],
                              &size_tmp, &size_units);
                    printf("%7.3lf %2s ) %"PRIu64"\n", size_tmp, size_units, number);
                }
             }
         }
    }

    /* free the memory used to hold bin counts */
    mfu_free(&disttotal);
    mfu_free(&dist);

    return 0;
}
예제 #16
0
/* for given depth, hash directory name and map to processes to
 * test whether having all files in same directory on one process
 * matters */
size_t mfu_flist_distribute_map(mfu_flist list, char** buffer,
                                  mfu_flist_name_encode_fn encode,
                                  mfu_flist_map_fn map, void* args)
{
    uint64_t idx;

    /* get our rank and number of ranks in job */
    int ranks;
    MPI_Comm_size(MPI_COMM_WORLD, &ranks);

    /* allocate arrays for alltoall */
    size_t bufsize = (size_t)ranks * sizeof(int);
    int* sendsizes  = (int*) MFU_MALLOC(bufsize);
    int* senddisps  = (int*) MFU_MALLOC(bufsize);
    int* sendoffset = (int*) MFU_MALLOC(bufsize);
    int* recvsizes  = (int*) MFU_MALLOC(bufsize);
    int* recvdisps  = (int*) MFU_MALLOC(bufsize);

    /* initialize sendsizes and offsets */
    int i;
    for (i = 0; i < ranks; i++) {
        sendsizes[i]  = 0;
        sendoffset[i] = 0;
    }

    /* compute number of bytes we'll send to each rank */
    size_t sendbytes = 0;
    uint64_t size = mfu_flist_size(list);
    for (idx = 0; idx < size; idx++) {
        int dest = map(list, idx, ranks, args);

        /* TODO: check that pack size doesn't overflow int */
        /* total number of bytes we'll send to each rank and the total overall */
        size_t count = encode(NULL, list, idx, args);
        sendsizes[dest] += (int) count;
        sendbytes += count;
    }

    /* compute displacements */
    senddisps[0] = 0;
    for (i = 1; i < ranks; i++) {
        senddisps[i] = senddisps[i - 1] + sendsizes[i - 1];
    }

    /* allocate space */
    char* sendbuf = (char*) MFU_MALLOC(sendbytes);

    /* copy data into buffer */
    for (idx = 0; idx < size; idx++) {
        int dest = map(list, idx, ranks, args);

        /* identify region to be sent to rank */
        char* path = sendbuf + senddisps[dest] + sendoffset[dest];
        size_t count = encode(path, list, idx, args);

        /* TODO: check that pack size doesn't overflow int */
        /* bump up the offset for this rank */
        sendoffset[dest] += (int) count;
    }

    /* alltoall to specify incoming counts */
    MPI_Alltoall(sendsizes, 1, MPI_INT, recvsizes, 1, MPI_INT, MPI_COMM_WORLD);

    /* compute size of recvbuf and displacements */
    size_t recvbytes = 0;
    recvdisps[0] = 0;
    for (i = 0; i < ranks; i++) {
        recvbytes += (size_t) recvsizes[i];
        if (i > 0) {
            recvdisps[i] = recvdisps[i - 1] + recvsizes[i - 1];
        }
    }

    /* allocate recvbuf */
    char* recvbuf = (char*) MFU_MALLOC(recvbytes);

    /* alltoallv to send data */
    MPI_Alltoallv(
        sendbuf, sendsizes, senddisps, MPI_CHAR,
        recvbuf, recvsizes, recvdisps, MPI_CHAR, MPI_COMM_WORLD
    );

    /* free memory */
    mfu_free(&recvdisps);
    mfu_free(&recvsizes);
    mfu_free(&sendbuf);
    mfu_free(&sendoffset);
    mfu_free(&senddisps);
    mfu_free(&sendsizes);

    *buffer = recvbuf;
    return recvbytes;
}
예제 #17
0
/* given an input flist, return a newly allocated flist consisting of 
 * a filtered set by finding all items that match/don't match a given
 * regular expression */
mfu_flist mfu_flist_filter_regex(mfu_flist flist, const char* regex_exp, int exclude, int name)
{
    /* create our list to return */
    mfu_flist dest = mfu_flist_subset(flist);

    /* check if user passed in an expression, if so then filter the list */
    if (regex_exp != NULL) {
        /* compile regular expression, if it fails print error */
        regex_t regex;
        int regex_return = regcomp(&regex, regex_exp, 0);
        if (regex_return) {
            MFU_ABORT(-1, "Could not compile regex: `%s' rc=%d\n", regex_exp, regex_return);
        }

        /* copy the things that don't or do (based on input) match the regex into a
         * filtered list */
        uint64_t idx = 0;
        uint64_t size = mfu_flist_size(flist);
        while (idx < size) {
            /* get full path of item */
            const char* file_name = mfu_flist_file_get_name(flist, idx);

            /* get basename of item (exclude the path) */
            mfu_path* pathname = mfu_path_from_str(file_name);
            mfu_path_basename(pathname);
            char* base = mfu_path_strdup(pathname);

            /* execute regex on item, either against the basename or
             * the full path depending on name flag */
            if (name) {
                /* run regex on basename */
                regex_return = regexec(&regex, base, 0, NULL, 0);
            }
            else {
                /* run regex on full path */
                regex_return = regexec(&regex, file_name, 0, NULL, 0);
            }

            /* copy item to the filtered list */
            if (exclude) {
                /* user wants to exclude items that match, so copy everything that
                 * does not match */
                if (regex_return == REG_NOMATCH) {
                    mfu_flist_file_copy(flist, idx, dest);
                }
            }
            else {
                /* user wants to copy over any matching items */
                if (regex_return == 0) {
                    mfu_flist_file_copy(flist, idx, dest);
                }
            }

            /* free the basename */
            mfu_free(&base);
            mfu_path_delete(&pathname);

            /* get next item in our list */
            idx++;
        }

        /* summarize the filtered list */
        mfu_flist_summarize(dest);
    }

    /* return the filtered list */
    return dest;
}
예제 #18
0
/* given an input list and a map function pointer, call map function
 * for each item in list, identify new rank to send item to and then
 * exchange items among ranks and return new output list */
mfu_flist mfu_flist_remap(mfu_flist list, mfu_flist_map_fn map, const void* args)
{
    uint64_t idx;

    /* create new list as subset (actually will be a remapping of
     * input list */
    mfu_flist newlist = mfu_flist_subset(list);

    /* get our rank and number of ranks in job */
    int ranks;
    MPI_Comm_size(MPI_COMM_WORLD, &ranks);

    /* allocate arrays for alltoall */
    size_t bufsize = (size_t)ranks * sizeof(int);
    int* sendsizes  = (int*) MFU_MALLOC(bufsize);
    int* senddisps  = (int*) MFU_MALLOC(bufsize);
    int* sendoffset = (int*) MFU_MALLOC(bufsize);
    int* recvsizes  = (int*) MFU_MALLOC(bufsize);
    int* recvdisps  = (int*) MFU_MALLOC(bufsize);

    /* initialize sendsizes and offsets */
    int i;
    for (i = 0; i < ranks; i++) {
        sendsizes[i]  = 0;
        sendoffset[i] = 0;
    }

    /* get number of elements in our local list */
    uint64_t size = mfu_flist_size(list);

    /* allocate space to record file-to-rank mapping */
    int* file2rank = (int*) MFU_MALLOC(size * sizeof(int));

    /* call map function for each item to identify its new rank,
     * and compute number of bytes we'll send to each rank */
    size_t sendbytes = 0;
    for (idx = 0; idx < size; idx++) {
        /* determine which rank we'll map this file to */
        int dest = map(list, idx, ranks, args);

        /* cache mapping so we don't have to compute it again
         * below while packing items for send */
        file2rank[idx] = dest;

        /* TODO: check that pack size doesn't overflow int */
        /* total number of bytes we'll send to each rank and the total overall */
        size_t count = mfu_flist_file_pack_size(list);
        sendsizes[dest] += (int) count;
        sendbytes += count;
    }

    /* compute send buffer displacements */
    senddisps[0] = 0;
    for (i = 1; i < ranks; i++) {
        senddisps[i] = senddisps[i - 1] + sendsizes[i - 1];
    }

    /* allocate space for send buffer */
    char* sendbuf = (char*) MFU_MALLOC(sendbytes);

    /* copy data into send buffer */
    for (idx = 0; idx < size; idx++) {
        /* determine which rank we mapped this file to */
        int dest = file2rank[idx];

        /* get pointer into send buffer and pack item */
        char* ptr = sendbuf + senddisps[dest] + sendoffset[dest];
        size_t count = mfu_flist_file_pack(ptr, list, idx);

        /* TODO: check that pack size doesn't overflow int */
        /* bump up the offset for this rank */
        sendoffset[dest] += (int) count;
    }

    /* alltoall to get our incoming counts */
    MPI_Alltoall(sendsizes, 1, MPI_INT, recvsizes, 1, MPI_INT, MPI_COMM_WORLD);

    /* compute size of recvbuf and displacements */
    size_t recvbytes = 0;
    recvdisps[0] = 0;
    for (i = 0; i < ranks; i++) {
        recvbytes += (size_t) recvsizes[i];
        if (i > 0) {
            recvdisps[i] = recvdisps[i - 1] + recvsizes[i - 1];
        }
    }

    /* allocate recvbuf */
    char* recvbuf = (char*) MFU_MALLOC(recvbytes);

    /* alltoallv to send data */
    MPI_Alltoallv(
        sendbuf, sendsizes, senddisps, MPI_CHAR,
        recvbuf, recvsizes, recvdisps, MPI_CHAR, MPI_COMM_WORLD
    );

    /* unpack items into new list */
    char* ptr = recvbuf;
    char* recvend = recvbuf + recvbytes;
    while (ptr < recvend) {
        size_t count = mfu_flist_file_unpack(ptr, newlist);
        ptr += count;
    }
    mfu_flist_summarize(newlist);

    /* free memory */
    mfu_free(&file2rank);
    mfu_free(&recvbuf);
    mfu_free(&recvdisps);
    mfu_free(&recvsizes);
    mfu_free(&sendbuf);
    mfu_free(&sendoffset);
    mfu_free(&senddisps);
    mfu_free(&sendsizes);

    /* return list to caller */
    return newlist;
}
예제 #19
0
/* given a list of files print from the start to end of the list */
void mfu_flist_print(mfu_flist flist)
{
    /* number of items to print from start and end of list */
    uint64_t range = 10;

    /* allocate send and receive buffers */
    size_t pack_size = mfu_flist_file_pack_size(flist);
    size_t bufsize = 2 * range * pack_size;
    void* sendbuf = MFU_MALLOC(bufsize);
    void* recvbuf = MFU_MALLOC(bufsize);

    /* get our rank and the size of comm_world */
    int rank, ranks;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &ranks);

    /* identify the number of items we have, the total number,
     * and our offset in the global list */
    uint64_t count  = mfu_flist_size(flist);
    uint64_t total  = mfu_flist_global_size(flist);
    uint64_t offset = mfu_flist_global_offset(flist);

    /* count the number of items we'll send */
    int num = 0;
    uint64_t idx = 0;
    while (idx < count) {
        uint64_t global = offset + idx;
        if (global < range || (total - global) <= range) {
            num++;
        }
        idx++;
    }

    /* allocate arrays to store counts and displacements */
    int* counts = (int*) MFU_MALLOC((size_t)ranks * sizeof(int));
    int* disps  = (int*) MFU_MALLOC((size_t)ranks * sizeof(int));

    /* tell rank 0 where the data is coming from */
    int bytes = num * (int)pack_size;
    MPI_Gather(&bytes, 1, MPI_INT, counts, 1, MPI_INT, 0, MPI_COMM_WORLD);

    /* pack items into sendbuf */
    idx = 0;
    char* ptr = (char*) sendbuf;
    while (idx < count) {
        uint64_t global = offset + idx;
        if (global < range || (total - global) <= range) {
            ptr += mfu_flist_file_pack(ptr, flist, idx);
        }
        idx++;
    }

    /* compute displacements and total bytes */
    int recvbytes = 0;
    if (rank == 0) {
        int i;
        disps[0] = 0;
        recvbytes += counts[0];
        for (i = 1; i < ranks; i++) {
            disps[i] = disps[i - 1] + counts[i - 1];
            recvbytes += counts[i];
        }
    }

    /* gather data to rank 0 */
    MPI_Gatherv(sendbuf, bytes, MPI_BYTE, recvbuf, counts, disps, MPI_BYTE, 0, MPI_COMM_WORLD);

    /* create temporary list to unpack items into */
    mfu_flist tmplist = mfu_flist_subset(flist);

    /* unpack items into new list */
    if (rank == 0) {
        ptr = (char*) recvbuf;
        char* end = ptr + recvbytes;
        while (ptr < end) {
            mfu_flist_file_unpack(ptr, tmplist);
            ptr += pack_size;
        }
    }

    /* summarize list */
    mfu_flist_summarize(tmplist);

    /* print files */
    if (rank == 0) {
        printf("\n");
        uint64_t tmpidx = 0;
        uint64_t tmpsize = mfu_flist_size(tmplist);
        while (tmpidx < tmpsize) {
            print_file(tmplist, tmpidx);
            tmpidx++;
            if (tmpidx == range && total > 2 * range) {
                /* going to have to leave some out */
                printf("\n<snip>\n\n");
            }
        }
        printf("\n");
    }

    /* free our temporary list */
    mfu_flist_free(&tmplist);

    /* free memory */
    mfu_free(&disps);
    mfu_free(&counts);
    mfu_free(&sendbuf);
    mfu_free(&recvbuf);

    return;
}