/* insert copy of specified element into list */ static void list_insert_copy(flist_t* flist, elem_t* src) { /* create new element */ elem_t* elem = (elem_t*) MFU_MALLOC(sizeof(elem_t)); /* copy values from source */ elem->file = MFU_STRDUP(src->file); elem->depth = src->depth; elem->type = src->type; elem->detail = src->detail; elem->mode = src->mode; elem->uid = src->uid; elem->gid = src->gid; elem->atime = src->atime; elem->atime_nsec = src->atime_nsec; elem->mtime = src->mtime; elem->mtime_nsec = src->mtime_nsec; elem->ctime = src->ctime; elem->ctime_nsec = src->ctime_nsec; elem->size = src->size; /* append element to tail of linked list */ mfu_flist_insert_elem(flist, elem); return; }
int MFU_PRED_EXEC (mfu_flist flist, uint64_t idx, void* arg) { int argmax = 1024*1024;; int written = 0; int ret; char* command = MFU_STRDUP((char*) arg); char* cmdline = (char*) MFU_MALLOC(argmax); char* subst = strstr(command, "{}"); if (subst) { subst[0] = '\0'; subst += 2; /* Point to the first char after '{}' */ } const char* name = mfu_flist_file_get_name(flist, idx); written = snprintf(cmdline, argmax/sizeof(char), "%s%s%s", command, name, subst); if (written > argmax/sizeof(char)) { fprintf(stderr, "argument %s to exec too long.\n", cmdline); mfu_free(&cmdline); mfu_free(&command); return -1; } ret = system(cmdline); mfu_free(&cmdline); mfu_free(&command); return ret ? 0 : 1; }
/* given an index, return pointer to that file element, * NULL if index is not in range */ static elem_t* list_get_elem(flist_t* flist, uint64_t idx) { uint64_t max = flist->list_count; /* build index of list elements if we don't already have one */ if (flist->list_index == NULL) { /* allocate array to record pointer to each element */ size_t index_size = max * sizeof(elem_t*); flist->list_index = (elem_t**) MFU_MALLOC(index_size); /* get pointer to each element */ uint64_t i = 0; elem_t* current = flist->list_head; while (i < max && current != NULL) { flist->list_index[i] = current; current = current->next; i++; } } /* return pointer to element if index is within range */ if (idx < max) { elem_t* elem = flist->list_index[idx]; return elem; } return NULL; }
/* given an input list, split items into separate lists depending * on their depth, returns number of levels, minimum depth, and * array of lists as output */ void mfu_flist_array_by_depth( mfu_flist srclist, int* outlevels, int* outmin, mfu_flist** outlists) { /* check that our pointers are valid */ if (outlevels == NULL || outmin == NULL || outlists == NULL) { return; } /* initialize return values */ *outlevels = 0; *outmin = -1; *outlists = NULL; /* get total file count */ uint64_t total = mfu_flist_global_size(srclist); if (total == 0) { return; } /* get min and max depths, determine number of levels, * allocate array of lists */ int min = mfu_flist_min_depth(srclist); int max = mfu_flist_max_depth(srclist); int levels = max - min + 1; mfu_flist* lists = (mfu_flist*) MFU_MALLOC((size_t)levels * sizeof(mfu_flist)); /* create a list for each level */ int i; for (i = 0; i < levels; i++) { lists[i] = mfu_flist_subset(srclist); } /* copy each item from source list to its corresponding level */ uint64_t idx = 0; uint64_t size = mfu_flist_size(srclist); while (idx < size) { int depth = mfu_flist_file_get_depth(srclist, idx); int depth_index = depth - min; mfu_flist dstlist = lists[depth_index]; mfu_flist_file_copy(srclist, idx, dstlist); idx++; } /* summarize each list */ for (i = 0; i < levels; i++) { mfu_flist_summarize(lists[i]); } /* set return parameters */ *outlevels = levels; *outmin = min; *outlists = lists; return; }
size_t mfu_flist_file_unpack(const void* buf, mfu_flist bflist) { /* convert handle to flist_t */ flist_t* flist = (flist_t*) bflist; elem_t* elem = (elem_t*) MFU_MALLOC(sizeof(elem_t)); size_t size = list_elem_unpack2(buf, elem); mfu_flist_insert_elem(flist, elem); return size; }
/* look up mtimes for specified file, * return secs/nsecs in newly allocated mfu_pred_times struct, * return NULL on error */ static mfu_pred_times* get_mtimes(const char* file) { mfu_param_path param_path; mfu_param_path_set(file, ¶m_path); if (! param_path.path_stat_valid) { return NULL; } mfu_pred_times* t = (mfu_pred_times*) MFU_MALLOC(sizeof(mfu_pred_times)); mfu_stat_get_mtimes(¶m_path.path_stat, &t->secs, &t->nsecs); mfu_param_path_free(¶m_path); return t; }
static int lustre_mds_stat(int fd, char* fname, struct stat* sb) { /* allocate a buffer */ size_t pathlen = strlen(fname) + 1; size_t bufsize = pathlen; //size_t datasize = sizeof(lstat_t) + lov_user_md_size(LOV_MAX_STRIPE_COUNT, LOV_USER_MAGIC_V3); size_t datasize = sizeof(struct lov_user_mds_data) + LOV_MAX_STRIPE_COUNT * sizeof(struct lov_user_ost_data_v1); if (datasize > bufsize) { bufsize = datasize; } char* buf = (char*) MFU_MALLOC(bufsize); /* Usage: ioctl(fd, IOC_MDC_GETFILEINFO, buf) * IN: fd open file descriptor of file's parent directory * IN: buf file name (no path) * OUT: buf lstat_t */ strcpy(buf, fname); // strncpy(buf, fname, bufsize); int ret = ioctl(fd, IOC_MDC_GETFILEINFO, buf); /* Copy lstat_t to struct stat */ if (ret != -1) { lstat_t* ls = (lstat_t*) & ((struct lov_user_mds_data*) buf)->lmd_st; sb->st_dev = ls->st_dev; sb->st_ino = ls->st_ino; sb->st_mode = ls->st_mode; sb->st_nlink = ls->st_nlink; sb->st_uid = ls->st_uid; sb->st_gid = ls->st_gid; sb->st_rdev = ls->st_rdev; sb->st_size = ls->st_size; sb->st_blksize = ls->st_blksize; sb->st_blocks = ls->st_blocks; sb->st_atime = ls->st_atime; sb->st_mtime = ls->st_mtime; sb->st_ctime = ls->st_ctime; lustre_stripe_info(buf); } else { MFU_LOG(MFU_LOG_ERR, "ioctl fd=%d (errno=%d %s)", fd, errno, strerror(errno)); } /* free the buffer */ mfu_free(&buf); return ret; }
/** * Parse the source and destination paths that the user has provided. */ void DCOPY_parse_path_args(char** argv, \ int optind_local, \ int argc) { /* compute number of paths and index of last argument */ int num_args = argc - optind_local; int last_arg_index = num_args + optind_local - 1; /* we need to have at least two paths, * one or more sources and one destination */ if(argv == NULL || num_args < 2) { if(DCOPY_global_rank == 0) { DCOPY_print_usage(); MFU_LOG(MFU_LOG_ERR, "You must specify a source and destination path"); } MPI_Barrier(MPI_COMM_WORLD); DCOPY_exit(EXIT_FAILURE); } /* determine number of source paths */ src_params = NULL; num_src_params = last_arg_index - optind_local; /* allocate space to record info about each source */ size_t src_params_bytes = ((size_t) num_src_params) * sizeof(mfu_param_path); src_params = (mfu_param_path*) MFU_MALLOC(src_params_bytes); /* record standardized paths and stat info for each source */ int opt_index; for(opt_index = optind_local; opt_index < last_arg_index; opt_index++) { char* path = argv[opt_index]; int idx = opt_index - optind_local; mfu_param_path_set(path, &src_params[idx]); } /* standardize destination path */ const char* dstpath = argv[last_arg_index]; mfu_param_path_set(dstpath, &dest_param); /* copy the destination path to user opts structure */ DCOPY_user_opts.dest_path = MFU_STRDUP(dest_param.path); /* check that source and destinations are ok */ DCOPY_check_paths(); }
static char* DTAR_encode_operation(DTAR_operation_code_t code, const char* operand, uint64_t fsize, uint64_t chunk_idx, uint64_t offset) { size_t opsize = (size_t) CIRCLE_MAX_STRING_LEN; char* op = (char*) MFU_MALLOC(opsize); size_t len = strlen(operand); int written = snprintf(op, opsize, "%" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%d:%d:%s", fsize, chunk_idx, offset, code, (int) len, operand); if (written >= opsize) { MFU_LOG(MFU_LOG_ERR, "Exceed libcirlce message size"); DTAR_abort(EXIT_FAILURE); } return op; }
/* allocate and initialize a new file list object */ mfu_flist mfu_flist_new() { /* allocate memory for file list, cast it to handle, initialize and return */ flist_t* flist = (flist_t*) MFU_MALLOC(sizeof(flist_t)); flist->detail = 0; flist->total_files = 0; /* initialize linked list */ flist->list_count = 0; flist->list_head = NULL; flist->list_tail = NULL; flist->list_index = NULL; /* initialize user and group structures */ mfu_flist_usrgrp_init(flist); mfu_flist bflist = (mfu_flist) flist; return bflist; }
DTAR_operation_t* DTAR_decode_operation(char *op) { DTAR_operation_t* ret = (DTAR_operation_t*) MFU_MALLOC( sizeof(DTAR_operation_t)); if (sscanf(strtok(op, ":"), "%" SCNu64, &(ret->file_size)) != 1) { MFU_LOG(MFU_LOG_ERR, "Could not decode file size attribute."); DTAR_abort(EXIT_FAILURE); } if (sscanf(strtok(NULL, ":"), "%" SCNu64, &(ret->chunk_index)) != 1) { MFU_LOG(MFU_LOG_ERR, "Could not decode chunk index attribute."); DTAR_abort(EXIT_FAILURE); } if (sscanf(strtok(NULL, ":"), "%" SCNu64, &(ret->offset)) != 1) { MFU_LOG(MFU_LOG_ERR, "Could not decode source base offset attribute."); DTAR_abort(EXIT_FAILURE); } if (sscanf(strtok(NULL, ":"), "%d", (int*) &(ret->code)) != 1) { MFU_LOG(MFU_LOG_ERR, "Could not decode stage code attribute."); DTAR_abort(EXIT_FAILURE); } /* get number of characters in operand string */ int op_len; char* str = strtok(NULL, ":"); if (sscanf(str, "%d", &op_len) != 1) { MFU_LOG(MFU_LOG_ERR, "Could not decode operand string length."); DTAR_abort(EXIT_FAILURE); } /* skip over digits and trailing ':' to get pointer to operand */ char* operand = str + strlen(str) + 1; operand[op_len] = '\0'; ret->operand = operand; return ret; }
/* given a list of param_paths, walk each one and add to flist */ void mfu_flist_walk_param_paths(uint64_t num, const mfu_param_path* params, mfu_walk_opts_t* walk_opts, mfu_flist flist) { /* allocate memory to hold a list of paths */ const char** path_list = (const char**) MFU_MALLOC(num * sizeof(char*)); /* fill list of paths and print each one */ uint64_t i; for (i = 0; i < num; i++) { /* get path for this step */ path_list[i] = params[i].path; } /* walk file tree and record stat data for each file */ mfu_flist_walk_paths((uint64_t) num, path_list, walk_opts, flist); /* free the list */ mfu_free(&path_list); return; }
static int add_type(mfu_pred* p, char t) { mode_t* type = (mode_t*) MFU_MALLOC(sizeof(mode_t)); switch (t) { case 'b': *type = S_IFBLK; break; case 'c': *type = S_IFCHR; break; case 'd': *type = S_IFDIR; break; case 'f': *type = S_IFREG; break; case 'l': *type = S_IFLNK; break; case 'p': *type = S_IFIFO; break; case 's': *type = S_IFSOCK; break; default: /* unsupported type character */ mfu_free(&type); return -1; break; } /* add check for this type */ mfu_pred_add(p, MFU_PRED_TYPE, (void *)type); return 1; }
int main(int argc, char** argv) { /* initialize MPI */ MPI_Init(&argc, &argv); mfu_init(); /* get our rank and the size of comm_world */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* pointer to mfu_walk_opts */ mfu_walk_opts_t* walk_opts = mfu_walk_opts_new(); /* parse command line options */ char* inputname = NULL; char* ownername = NULL; char* groupname = NULL; char* modestr = NULL; char* regex_exp = NULL; mfu_perms* head = NULL; int walk = 0; int exclude = 0; int name = 0; /* verbose by default */ mfu_debug_level = MFU_LOG_VERBOSE; int option_index = 0; static struct option long_options[] = { {"input", 1, 0, 'i'}, {"owner", 1, 0, 'u'}, {"group", 1, 0, 'g'}, {"mode", 1, 0, 'm'}, {"exclude", 1, 0, 'e'}, {"match", 1, 0, 'a'}, {"name", 0, 0, 'n'}, {"progress", 1, 0, 'P'}, {"verbose", 0, 0, 'v'}, {"quiet", 0, 0, 'q'}, {"help", 0, 0, 'h'}, {0, 0, 0, 0} }; int usage = 0; while (1) { int c = getopt_long( argc, argv, "i:u:g:m:nvqh", long_options, &option_index ); if (c == -1) { break; } switch (c) { case 'i': inputname = MFU_STRDUP(optarg); break; case 'u': ownername = MFU_STRDUP(optarg); break; case 'g': groupname = MFU_STRDUP(optarg); break; case 'm': modestr = MFU_STRDUP(optarg); break; case 'e': regex_exp = MFU_STRDUP(optarg); exclude = 1; break; case 'a': regex_exp = MFU_STRDUP(optarg); exclude = 0; break; case 'n': name = 1; break; case 'P': mfu_progress_timeout = atoi(optarg); break; case 'v': mfu_debug_level = MFU_LOG_VERBOSE; break; case 'q': mfu_debug_level = MFU_LOG_NONE; break; case 'h': usage = 1; break; case '?': usage = 1; break; default: if (rank == 0) { printf("?? getopt returned character code 0%o ??\n", c); } } } /* check that we got a valid progress value */ if (mfu_progress_timeout < 0) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Seconds in --progress must be non-negative: %d invalid", mfu_progress_timeout); } usage = 1; } /* paths to walk come after the options */ int numpaths = 0; mfu_param_path* paths = NULL; if (optind < argc) { /* got a path to walk */ walk = 1; /* determine number of paths specified by user */ numpaths = argc - optind; /* allocate space for each path */ paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path)); /* process each path */ const char** argpaths = (const char**)(&argv[optind]); mfu_param_path_set_all(numpaths, argpaths, paths); /* advance to next set of options */ optind += numpaths; /* don't allow input file and walk */ if (inputname != NULL) { usage = 1; } } else { /* if we're not walking, we must be reading, * and for that we need a file */ if (inputname == NULL) { usage = 1; } } /* check that our mode string parses correctly */ if (modestr != NULL) { int valid = mfu_perms_parse(modestr, &head); if (! valid) { usage = 1; if (rank == 0) { printf("invalid mode string: %s\n", modestr); } /* free the head of the list */ mfu_perms_free(&head); } } /* print usage if we need to */ if (usage) { if (rank == 0) { print_usage(); } mfu_finalize(); MPI_Finalize(); return 1; } /* create an empty file list */ mfu_flist flist = mfu_flist_new(); /* flag used to check if permissions need to be * set on the walk */ if (head != NULL) { mfu_perms_need_dir_rx(head, walk_opts); } /* get our list of files, either by walking or reading an * input file */ if (walk) { /* if in octal mode set use_stat=0 to stat each file on walk */ if (head != NULL && head->octal && ownername == NULL && groupname == NULL) { walk_opts->use_stat = 0; } /* walk list of input paths */ mfu_flist_walk_param_paths(numpaths, paths, walk_opts, flist); } else { /* read list from file */ mfu_flist_read_cache(inputname, flist); } /* assume we'll use the full list */ mfu_flist srclist = flist; /* filter the list if needed */ mfu_flist filtered_flist = MFU_FLIST_NULL; if (regex_exp != NULL) { /* filter the list based on regex */ filtered_flist = mfu_flist_filter_regex(flist, regex_exp, exclude, name); /* update our source list to use the filtered list instead of the original */ srclist = filtered_flist; } /* change group and permissions */ mfu_flist_chmod(srclist, ownername, groupname, head); /* free list if it was used */ if (filtered_flist != MFU_FLIST_NULL){ /* free the filtered flist (if any) */ mfu_flist_free(&filtered_flist); } /* free the file list */ mfu_flist_free(&flist); /* free the path parameters */ mfu_param_path_free_all(numpaths, paths); /* free memory allocated to hold params */ mfu_free(&paths); /* free the owner and group names */ mfu_free(&ownername); mfu_free(&groupname); /* free the modestr */ mfu_free(&modestr); /* free the match_pattern if it isn't null */ if (regex_exp != NULL) { mfu_free(®ex_exp); } /* free the head of the list */ mfu_perms_free(&head); /* free the input file name */ mfu_free(&inputname); /* free the walk options */ mfu_walk_opts_delete(&walk_opts); /* shut down MPI */ mfu_finalize(); MPI_Finalize(); return 0; }
int main(int argc, char** argv) { int i; /* initialize MPI */ MPI_Init(&argc, &argv); mfu_init(); /* get our rank and the size of comm_world */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* parse command line options */ char* inputname = NULL; char* regex_exp = NULL; int walk = 0; int exclude = 0; int name = 0; int dryrun = 0; int option_index = 0; static struct option long_options[] = { {"input", 1, 0, 'i'}, {"lite", 0, 0, 'l'}, {"exclude", 1, 0, 'e'}, {"match", 1, 0, 'a'}, {"name", 0, 0, 'n'}, {"help", 0, 0, 'h'}, {"dryrun", 0, 0, 'd'}, {"verbose", 0, 0, 'v'}, {0, 0, 0, 0} }; int usage = 0; while (1) { int c = getopt_long( argc, argv, "i:nlhv", long_options, &option_index ); if (c == -1) { break; } switch (c) { case 'i': inputname = MFU_STRDUP(optarg); break; case 'l': walk_stat = 0; break; case 'e': regex_exp = MFU_STRDUP(optarg); exclude = 1; break; case 'a': regex_exp = MFU_STRDUP(optarg); exclude = 0; break; case 'n': name = 1; break; case 'h': usage = 1; break; case 'd': dryrun = 1; break; case 'v': mfu_debug_level = MFU_LOG_VERBOSE; break; case '?': usage = 1; break; default: if (rank == 0) { printf("?? getopt returned character code 0%o ??\n", c); } } } /* paths to walk come after the options */ int numpaths = 0; mfu_param_path* paths = NULL; if (optind < argc) { /* got a path to walk */ walk = 1; /* determine number of paths specified by user */ numpaths = argc - optind; /* allocate space for each path */ paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path)); /* process each path */ char** argpaths = &argv[optind]; mfu_param_path_set_all(numpaths, argpaths, paths); /* advance to next set of options */ optind += numpaths; /* don't allow input file and walk */ if (inputname != NULL) { usage = 1; } } else { /* if we're not walking, we must be reading, * and for that we need a file */ if (inputname == NULL) { usage = 1; } } /* print usage if we need to */ if (usage) { if (rank == 0) { print_usage(); } mfu_finalize(); MPI_Finalize(); return 1; } /* create an empty file list */ mfu_flist flist = mfu_flist_new(); /* get our list of files, either by walking or reading an * input file */ if (walk) { /* walk list of input paths */ mfu_param_path_walk(numpaths, paths, walk_stat, flist, dir_perm); } else { /* read list from file */ mfu_flist_read_cache(inputname, flist); } /* assume we'll use the full list */ mfu_flist srclist = flist; /* filter the list if needed */ mfu_flist filtered_flist = MFU_FLIST_NULL; if (regex_exp != NULL) { /* filter the list based on regex */ filtered_flist = mfu_flist_filter_regex(flist, regex_exp, exclude, name); /* update our source list to use the filtered list instead of the original */ srclist = filtered_flist; } /* only actually delete files if the user wasn't doing a dry run */ if (dryrun) { /* just print what we would delete without actually doing anything, * this is useful if the user is trying to get a regex right */ mfu_flist_print(srclist); } else { /* remove files */ mfu_flist_unlink(srclist); } /* free list if it was used */ if (filtered_flist != MFU_FLIST_NULL){ /* free the filtered flist (if any) */ mfu_flist_free(&filtered_flist); } /* free the file list */ mfu_flist_free(&flist); /* free the path parameters */ mfu_param_path_free_all(numpaths, paths); /* free memory allocated to hold params */ mfu_free(&paths); /* free the regex string if we have one */ mfu_free(®ex_exp); /* free the input file name */ mfu_free(&inputname); /* shut down MPI */ mfu_finalize(); MPI_Finalize(); return 0; }
int main (int argc, char** argv) { /* initialize MPI */ MPI_Init(&argc, &argv); mfu_init(); /* get our rank and the size of comm_world */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* pointer to mfu_walk_opts */ mfu_walk_opts_t* walk_opts = mfu_walk_opts_new(); /* capture current time for any time based queries, * to get a consistent value, capture and bcast from rank 0 */ mfu_pred_times* now_t = mfu_pred_now(); int ch; mfu_pred* pred_head = mfu_pred_new(); char* inputname = NULL; char* outputname = NULL; int walk = 0; int text = 0; static struct option long_options[] = { {"input", 1, 0, 'i'}, {"output", 1, 0, 'o'}, {"verbose", 0, 0, 'v'}, {"quiet", 0, 0, 'q'}, {"help", 0, 0, 'h'}, { "maxdepth", required_argument, NULL, 'd' }, { "amin", required_argument, NULL, 'a' }, { "anewer", required_argument, NULL, 'B' }, { "atime", required_argument, NULL, 'A' }, { "cmin", required_argument, NULL, 'c' }, { "cnewer", required_argument, NULL, 'D' }, { "ctime", required_argument, NULL, 'C' }, { "mmin", required_argument, NULL, 'm' }, { "newer", required_argument, NULL, 'N' }, { "mtime", required_argument, NULL, 'M' }, { "gid", required_argument, NULL, 'g' }, { "group", required_argument, NULL, 'G' }, { "uid", required_argument, NULL, 'u' }, { "user", required_argument, NULL, 'U' }, { "name", required_argument, NULL, 'n' }, { "path", required_argument, NULL, 'P' }, { "regex", required_argument, NULL, 'r' }, { "size", required_argument, NULL, 's' }, { "type", required_argument, NULL, 't' }, { "print", no_argument, NULL, 'p' }, { "exec", required_argument, NULL, 'e' }, { NULL, 0, NULL, 0 }, }; options.maxdepth = INT_MAX; int usage = 0; while (1) { int c = getopt_long( argc, argv, "i:o:vqh", long_options, NULL ); if (c == -1) { break; } int i; int space; char* buf; mfu_pred_times* t; mfu_pred_times_rel* tr; regex_t* r; int ret; /* verbose by default */ mfu_debug_level = MFU_LOG_VERBOSE; switch (c) { case 'e': space = 1024 * 1024; buf = (char *)MFU_MALLOC(space); for (i = optind-1; strcmp(";", argv[i]); i++) { if (i > argc) { if (rank == 0) { printf("%s: exec missing terminating ';'\n", argv[0]); } exit(1); } strncat(buf, argv[i], space); space -= strlen(argv[i]) + 1; /* save room for space or null */ if (space <= 0) { if (rank == 0) { printf("%s: exec argument list too long.\n", argv[0]); } mfu_free(&buf); continue; } strcat(buf, " "); optind++; } buf[strlen(buf)] = '\0'; /* clobbers trailing space */ mfu_pred_add(pred_head, MFU_PRED_EXEC, buf); break; case 'd': options.maxdepth = atoi(optarg); break; case 'g': /* TODO: error check argument */ buf = MFU_STRDUP(optarg); mfu_pred_add(pred_head, MFU_PRED_GID, (void *)buf); break; case 'G': buf = MFU_STRDUP(optarg); mfu_pred_add(pred_head, MFU_PRED_GROUP, (void *)buf); break; case 'u': /* TODO: error check argument */ buf = MFU_STRDUP(optarg); mfu_pred_add(pred_head, MFU_PRED_UID, (void *)buf); break; case 'U': buf = MFU_STRDUP(optarg); mfu_pred_add(pred_head, MFU_PRED_USER, (void *)buf); break; case 's': buf = MFU_STRDUP(optarg); mfu_pred_add(pred_head, MFU_PRED_SIZE, (void *)buf); break; case 'n': mfu_pred_add(pred_head, MFU_PRED_NAME, MFU_STRDUP(optarg)); break; case 'P': mfu_pred_add(pred_head, MFU_PRED_PATH, MFU_STRDUP(optarg)); break; case 'r': r = (regex_t*) MFU_MALLOC(sizeof(regex_t)); ret = regcomp(r, optarg, 0); if (ret) { MFU_ABORT(-1, "Could not compile regex: `%s' rc=%d\n", optarg, ret); } mfu_pred_add(pred_head, MFU_PRED_REGEX, (void*)r); break; case 'a': tr = mfu_pred_relative(optarg, now_t); mfu_pred_add(pred_head, MFU_PRED_AMIN, (void *)tr); break; case 'm': tr = mfu_pred_relative(optarg, now_t); mfu_pred_add(pred_head, MFU_PRED_MMIN, (void *)tr); break; case 'c': tr = mfu_pred_relative(optarg, now_t); mfu_pred_add(pred_head, MFU_PRED_CMIN, (void *)tr); break; case 'A': tr = mfu_pred_relative(optarg, now_t); mfu_pred_add(pred_head, MFU_PRED_ATIME, (void *)tr); break; case 'M': tr = mfu_pred_relative(optarg, now_t); mfu_pred_add(pred_head, MFU_PRED_MTIME, (void *)tr); break; case 'C': tr = mfu_pred_relative(optarg, now_t); mfu_pred_add(pred_head, MFU_PRED_CTIME, (void *)tr); break; case 'B': t = get_mtimes(optarg); if (t == NULL) { if (rank == 0) { printf("%s: can't find file %s\n", argv[0], optarg); } exit(1); } mfu_pred_add(pred_head, MFU_PRED_ANEWER, (void *)t); break; case 'N': t = get_mtimes(optarg); if (t == NULL) { if (rank == 0) { printf("%s: can't find file %s\n", argv[0], optarg); } exit(1); } mfu_pred_add(pred_head, MFU_PRED_MNEWER, (void *)t); break; case 'D': t = get_mtimes(optarg); if (t == NULL) { if (rank == 0) { printf("%s: can't find file %s\n", argv[0], optarg); } exit(1); } mfu_pred_add(pred_head, MFU_PRED_CNEWER, (void *)t); break; case 'p': mfu_pred_add(pred_head, MFU_PRED_PRINT, NULL); break; case 't': ret = add_type(pred_head, *optarg); if (ret != 1) { if (rank == 0) { printf("%s: unsupported file type %s\n", argv[0], optarg); } exit(1); } break; case 'i': inputname = MFU_STRDUP(optarg); break; case 'o': outputname = MFU_STRDUP(optarg); break; case 'v': mfu_debug_level = MFU_LOG_VERBOSE; break; case 'q': mfu_debug_level = MFU_LOG_NONE; break; case 'h': usage = 1; break; case '?': usage = 1; break; default: if (rank == 0) { printf("?? getopt returned character code 0%o ??\n", c); } } } pred_commit(pred_head); /* paths to walk come after the options */ int numpaths = 0; mfu_param_path* paths = NULL; if (optind < argc) { /* got a path to walk */ walk = 1; /* determine number of paths specified by user */ numpaths = argc - optind; /* allocate space for each path */ paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path)); /* process each path */ char** p = &argv[optind]; mfu_param_path_set_all((uint64_t)numpaths, (const char**)p, paths); optind += numpaths; /* don't allow user to specify input file with walk */ if (inputname != NULL) { usage = 1; } } else { /* if we're not walking, we must be reading, * and for that we need a file */ if (inputname == NULL) { usage = 1; } } if (usage) { if (rank == 0) { print_usage(); } mfu_finalize(); MPI_Finalize(); return 0; } /* create an empty file list */ mfu_flist flist = mfu_flist_new(); if (walk) { /* walk list of input paths */ mfu_flist_walk_param_paths(numpaths, paths, walk_opts, flist); } else { /* read data from cache file */ mfu_flist_read_cache(inputname, flist); } /* apply predicates to each item in list */ mfu_flist flist2 = mfu_flist_filter_pred(flist, pred_head); /* write data to cache file */ if (outputname != NULL) { if (!text) { mfu_flist_write_cache(outputname, flist2); } else { mfu_flist_write_text(outputname, flist2); } } /* free off the filtered list */ mfu_flist_free(&flist2); /* free users, groups, and files objects */ mfu_flist_free(&flist); /* free predicate list */ mfu_pred_free(&pred_head); /* free memory allocated for options */ mfu_free(&outputname); mfu_free(&inputname); /* free the path parameters */ mfu_param_path_free_all(numpaths, paths); /* free memory allocated to hold params */ mfu_free(&paths); /* free structure holding current time */ mfu_free(&now_t); /* free the walk options */ mfu_walk_opts_delete(&walk_opts); /* shut down MPI */ mfu_finalize(); MPI_Finalize(); return 0; }
/* copy all extended attributes from op->operand to dest_path */ void DCOPY_copy_xattrs( DCOPY_operation_t* op, const struct stat64* statbuf, const char* dest_path) { #if DCOPY_USE_XATTRS /* get source file name */ char* src_path = op->operand; /* start with a reasonable buffer, we'll allocate more as needed */ size_t list_bufsize = 1204; char* list = (char*) MFU_MALLOC(list_bufsize); /* get list, if list_size == ERANGE, try again */ ssize_t list_size; int got_list = 0; /* get current estimate for list size */ while(! got_list) { list_size = llistxattr(src_path, list, list_bufsize); if(list_size < 0) { if(errno == ERANGE) { /* buffer is too small, free our current buffer * and call it again with size==0 to get new size */ mfu_free(&list); list_bufsize = 0; } else if(errno == ENOTSUP) { /* this is common enough that we silently ignore it */ break; } else { /* this is a real error */ MFU_LOG(MFU_LOG_ERR, "Failed to get list of extended attributes on %s llistxattr() errno=%d %s", src_path, errno, strerror(errno) ); break; } } else { if(list_size > 0 && list_bufsize == 0) { /* called llistxattr with size==0 and got back positive * number indicating size of buffer we need to allocate */ list_bufsize = (size_t) list_size; list = (char*) MFU_MALLOC(list_bufsize); } else { /* got our list, it's size is in list_size, which may be 0 */ got_list = 1; } } } /* iterate over list and copy values to new object lgetxattr/lsetxattr */ if(got_list) { char* name = list; while(name < list + list_size) { /* start with a reasonable buffer, * allocate something bigger as needed */ size_t val_bufsize = 1024; void* val = (void*) MFU_MALLOC(val_bufsize); /* lookup value for name */ ssize_t val_size; int got_val = 0; while(! got_val) { val_size = lgetxattr(src_path, name, val, val_bufsize); if(val_size < 0) { if(errno == ERANGE) { /* buffer is too small, free our current buffer * and call it again with size==0 to get new size */ mfu_free(&val); val_bufsize = 0; } else if(errno == ENOATTR) { /* source object no longer has this attribute, * maybe deleted out from under us */ break; } else { /* this is a real error */ MFU_LOG(MFU_LOG_ERR, "Failed to get value for name=%s on %s llistxattr() errno=%d %s", name, src_path, errno, strerror(errno) ); break; } } else { if(val_size > 0 && val_bufsize == 0) { /* called lgetxattr with size==0 and got back positive * number indicating size of buffer we need to allocate */ val_bufsize = (size_t) val_size; val = (void*) MFU_MALLOC(val_bufsize); } else { /* got our value, it's size is in val_size, which may be 0 */ got_val = 1; } } } /* set attribute on destination object */ if(got_val) { int setrc = lsetxattr(dest_path, name, val, (size_t) val_size, 0); if(setrc != 0) { MFU_LOG(MFU_LOG_ERR, "Failed to set value for name=%s on %s llistxattr() errno=%d %s", name, dest_path, errno, strerror(errno) ); } } /* free value string */ mfu_free(&val); val_bufsize = 0; /* jump to next name */ size_t namelen = strlen(name) + 1; name += namelen; } } /* free space allocated for list */ mfu_free(&list); list_bufsize = 0; return; #endif /* DCOPY_USE_XATTR */ }
/* given an input list and a map function pointer, call map function * for each item in list, identify new rank to send item to and then * exchange items among ranks and return new output list */ mfu_flist mfu_flist_remap(mfu_flist list, mfu_flist_map_fn map, const void* args) { uint64_t idx; /* create new list as subset (actually will be a remapping of * input list */ mfu_flist newlist = mfu_flist_subset(list); /* get our rank and number of ranks in job */ int ranks; MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* allocate arrays for alltoall */ size_t bufsize = (size_t)ranks * sizeof(int); int* sendsizes = (int*) MFU_MALLOC(bufsize); int* senddisps = (int*) MFU_MALLOC(bufsize); int* sendoffset = (int*) MFU_MALLOC(bufsize); int* recvsizes = (int*) MFU_MALLOC(bufsize); int* recvdisps = (int*) MFU_MALLOC(bufsize); /* initialize sendsizes and offsets */ int i; for (i = 0; i < ranks; i++) { sendsizes[i] = 0; sendoffset[i] = 0; } /* get number of elements in our local list */ uint64_t size = mfu_flist_size(list); /* allocate space to record file-to-rank mapping */ int* file2rank = (int*) MFU_MALLOC(size * sizeof(int)); /* call map function for each item to identify its new rank, * and compute number of bytes we'll send to each rank */ size_t sendbytes = 0; for (idx = 0; idx < size; idx++) { /* determine which rank we'll map this file to */ int dest = map(list, idx, ranks, args); /* cache mapping so we don't have to compute it again * below while packing items for send */ file2rank[idx] = dest; /* TODO: check that pack size doesn't overflow int */ /* total number of bytes we'll send to each rank and the total overall */ size_t count = mfu_flist_file_pack_size(list); sendsizes[dest] += (int) count; sendbytes += count; } /* compute send buffer displacements */ senddisps[0] = 0; for (i = 1; i < ranks; i++) { senddisps[i] = senddisps[i - 1] + sendsizes[i - 1]; } /* allocate space for send buffer */ char* sendbuf = (char*) MFU_MALLOC(sendbytes); /* copy data into send buffer */ for (idx = 0; idx < size; idx++) { /* determine which rank we mapped this file to */ int dest = file2rank[idx]; /* get pointer into send buffer and pack item */ char* ptr = sendbuf + senddisps[dest] + sendoffset[dest]; size_t count = mfu_flist_file_pack(ptr, list, idx); /* TODO: check that pack size doesn't overflow int */ /* bump up the offset for this rank */ sendoffset[dest] += (int) count; } /* alltoall to get our incoming counts */ MPI_Alltoall(sendsizes, 1, MPI_INT, recvsizes, 1, MPI_INT, MPI_COMM_WORLD); /* compute size of recvbuf and displacements */ size_t recvbytes = 0; recvdisps[0] = 0; for (i = 0; i < ranks; i++) { recvbytes += (size_t) recvsizes[i]; if (i > 0) { recvdisps[i] = recvdisps[i - 1] + recvsizes[i - 1]; } } /* allocate recvbuf */ char* recvbuf = (char*) MFU_MALLOC(recvbytes); /* alltoallv to send data */ MPI_Alltoallv( sendbuf, sendsizes, senddisps, MPI_CHAR, recvbuf, recvsizes, recvdisps, MPI_CHAR, MPI_COMM_WORLD ); /* unpack items into new list */ char* ptr = recvbuf; char* recvend = recvbuf + recvbytes; while (ptr < recvend) { size_t count = mfu_flist_file_unpack(ptr, newlist); ptr += count; } mfu_flist_summarize(newlist); /* free memory */ mfu_free(&file2rank); mfu_free(&recvbuf); mfu_free(&recvdisps); mfu_free(&recvsizes); mfu_free(&sendbuf); mfu_free(&sendoffset); mfu_free(&senddisps); mfu_free(&sendsizes); /* return list to caller */ return newlist; }
/* for given depth, evenly spread the files among processes for * improved load balancing */ static void remove_spread(mfu_flist flist, uint64_t* rmcount) { uint64_t idx; /* initialize our remove count */ *rmcount = 0; /* get our rank and number of ranks in job */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* allocate memory for alltoall exchanges */ size_t bufsize = (size_t)ranks * sizeof(int); int* sendcounts = (int*) MFU_MALLOC(bufsize); int* sendsizes = (int*) MFU_MALLOC(bufsize); int* senddisps = (int*) MFU_MALLOC(bufsize); int* recvsizes = (int*) MFU_MALLOC(bufsize); int* recvdisps = (int*) MFU_MALLOC(bufsize); /* get number of items */ uint64_t my_count = mfu_flist_size(flist); uint64_t all_count = mfu_flist_global_size(flist); uint64_t offset = mfu_flist_global_offset(flist); /* compute number of bytes we'll send */ size_t sendbytes = 0; for (idx = 0; idx < my_count; idx++) { const char* name = mfu_flist_file_get_name(flist, idx); size_t len = strlen(name) + 2; sendbytes += len; } /* compute the number of items that each rank should have */ uint64_t low = all_count / (uint64_t)ranks; uint64_t extra = all_count - low * (uint64_t)ranks; /* compute number that we'll send to each rank and initialize sendsizes and offsets */ uint64_t i; for (i = 0; i < (uint64_t)ranks; i++) { /* compute starting element id and count for given rank */ uint64_t start, num; if (i < extra) { num = low + 1; start = i * num; } else { num = low; start = (i - extra) * num + extra * (low + 1); } /* compute the number of items we'll send to this task */ uint64_t sendcnt = 0; if (my_count > 0) { if (start <= offset && offset < start + num) { /* this rank overlaps our range, * and its first element comes at or before our first element */ sendcnt = num - (offset - start); if (my_count < sendcnt) { /* the number the rank could receive from us * is more than we have left */ sendcnt = my_count; } } else if (offset < start && start < offset + my_count) { /* this rank overlaps our range, * and our first element comes strictly before its first element */ sendcnt = my_count - (start - offset); if (num < sendcnt) { /* the number the rank can receive from us * is less than we have left */ sendcnt = num; } } } /* record the number of items we'll send to this task */ sendcounts[i] = (int) sendcnt; /* set sizes and displacements to 0, we'll fix this later */ sendsizes[i] = 0; senddisps[i] = 0; } /* allocate space */ char* sendbuf = (char*) MFU_MALLOC(sendbytes); /* copy data into buffer */ int dest = -1; int disp = 0; for (idx = 0; idx < my_count; idx++) { /* get name and type of item */ const char* name = mfu_flist_file_get_name(flist, idx); mfu_filetype type = mfu_flist_file_get_type(flist, idx); /* get rank that we're packing data for */ if (dest == -1) { dest = get_first_nonzero(sendcounts, ranks); if (dest == -1) { /* error */ } /* about to copy first item for this rank, * record its displacement */ senddisps[dest] = disp; } /* identify region to be sent to rank */ char* path = sendbuf + disp; /* first character encodes item type */ if (type == MFU_TYPE_DIR) { path[0] = 'd'; } else if (type == MFU_TYPE_FILE || type == MFU_TYPE_LINK) { path[0] = 'f'; } else { path[0] = 'u'; } /* now copy in the path */ strcpy(&path[1], name); /* TODO: check that we don't overflow the int */ /* add bytes to sendsizes and increase displacement */ size_t count = strlen(name) + 2; sendsizes[dest] += (int) count; disp += (int) count; /* decrement the count for this rank */ sendcounts[dest]--; if (sendcounts[dest] == 0) { dest = -1; } } /* compute displacements */ senddisps[0] = 0; for (i = 1; i < (uint64_t)ranks; i++) { senddisps[i] = senddisps[i - 1] + sendsizes[i - 1]; } /* alltoall to specify incoming counts */ MPI_Alltoall(sendsizes, 1, MPI_INT, recvsizes, 1, MPI_INT, MPI_COMM_WORLD); /* compute size of recvbuf and displacements */ size_t recvbytes = 0; recvdisps[0] = 0; for (i = 0; i < (uint64_t)ranks; i++) { recvbytes += (size_t) recvsizes[i]; if (i > 0) { recvdisps[i] = recvdisps[i - 1] + recvsizes[i - 1]; } } /* allocate recvbuf */ char* recvbuf = (char*) MFU_MALLOC(recvbytes); /* alltoallv to send data */ MPI_Alltoallv( sendbuf, sendsizes, senddisps, MPI_CHAR, recvbuf, recvsizes, recvdisps, MPI_CHAR, MPI_COMM_WORLD ); /* delete data */ char* item = recvbuf; while (item < recvbuf + recvbytes) { /* get item name and type */ char type = item[0]; char* name = &item[1]; /* delete item */ remove_type(type, name); /* keep tally of number of items we deleted */ *rmcount++; /* go to next item */ size_t item_size = strlen(item) + 1; item += item_size; } /* free memory */ mfu_free(&recvbuf); mfu_free(&recvdisps); mfu_free(&recvsizes); mfu_free(&sendbuf); mfu_free(&senddisps); mfu_free(&sendsizes); mfu_free(&sendcounts); return; }
int main(int argc, char* argv[]) { MPI_Init(&argc, &argv); mfu_init(); /* get our rank and number of ranks in the job */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* pointer to mfu_walk_opts */ mfu_walk_opts_t* walk_opts = mfu_walk_opts_new(); uint64_t idx; int option_index = 0; int usage = 0; int report = 0; unsigned int numpaths = 0; mfu_param_path* paths = NULL; unsigned long long bytes; /* verbose by default */ mfu_debug_level = MFU_LOG_VERBOSE; /* default to 1MB stripe size, stripe across all OSTs, and all files are candidates */ int stripes = -1; uint64_t stripe_size = 1048576; uint64_t min_size = 0; static struct option long_options[] = { {"count", 1, 0, 'c'}, {"size", 1, 0, 's'}, {"minsize", 1, 0, 'm'}, {"report", 0, 0, 'r'}, {"progress", 1, 0, 'P'}, {"verbose", 0, 0, 'v'}, {"quiet", 0, 0, 'q'}, {"help", 0, 0, 'h'}, {0, 0, 0, 0} }; while (1) { int c = getopt_long(argc, argv, "c:s:m:rvqh", long_options, &option_index); if (c == -1) { break; } switch (c) { case 'c': /* stripe count */ stripes = atoi(optarg); break; case 's': /* stripe size in bytes */ if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Failed to parse stripe size: %s", optarg); } MPI_Abort(MPI_COMM_WORLD, 1); } stripe_size = (uint64_t)bytes; break; case 'm': /* min file size in bytes */ if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Failed to parse minimum file size: %s", optarg); } MPI_Abort(MPI_COMM_WORLD, 1); } min_size = (uint64_t)bytes; break; case 'r': /* report striping info */ report = 1; break; case 'P': mfu_progress_timeout = atoi(optarg); break; case 'v': mfu_debug_level = MFU_LOG_VERBOSE; break; case 'q': mfu_debug_level = MFU_LOG_NONE; break; case 'h': /* display usage */ usage = 1; break; case '?': /* display usage */ usage = 1; break; default: if (rank == 0) { printf("?? getopt returned character code 0%o ??\n", c); } } } /* check that we got a valid progress value */ if (mfu_progress_timeout < 0) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Seconds in --progress must be non-negative: %d invalid", mfu_progress_timeout); } usage = 1; } /* paths to walk come after the options */ if (optind < argc) { /* determine number of paths specified by user */ numpaths = argc - optind; /* allocate space for each path */ paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path)); /* process each path */ char** p = &argv[optind]; mfu_param_path_set_all((uint64_t)numpaths, (const char**)p, paths); optind += numpaths; } else { usage = 1; } /* if we need to print usage, print it and exit */ if (usage) { if (rank == 0) { print_usage(); } mfu_finalize(); MPI_Finalize(); return 1; } /* nothing to do if lustre support is disabled */ #ifndef LUSTRE_SUPPORT if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Lustre support is disabled."); } MPI_Abort(MPI_COMM_WORLD, 1); #endif /* stripe count must be -1 for all available or greater than 0 */ if (stripes < -1) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Stripe count must be -1 for all servers, 0 for lustre file system default, or a positive value"); } MPI_Abort(MPI_COMM_WORLD, 1); } /* lustre requires stripe sizes to be aligned */ if (stripe_size > 0 && stripe_size % 65536 != 0) { if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Stripe size must be a multiple of 65536"); } MPI_Abort(MPI_COMM_WORLD, 1); } /* TODO: verify that source / target are on Lustre */ /* walk list of input paths and stat as we walk */ mfu_flist flist = mfu_flist_new(); mfu_flist_walk_param_paths(numpaths, paths, walk_opts, flist); /* filter down our list to files which don't meet our striping requirements */ mfu_flist filtered = filter_list(flist, stripes, stripe_size, min_size, &create_prog_count_total, &stripe_prog_bytes_total); mfu_flist_free(&flist); MPI_Barrier(MPI_COMM_WORLD); /* report the file size and stripe count of all files we found */ if (report) { /* report the files in our filtered list */ stripe_info_report(filtered); /* free the paths and our list */ mfu_flist_free(&filtered); mfu_param_path_free_all(numpaths, paths); mfu_free(&paths); /* finalize */ mfu_finalize(); MPI_Finalize(); return 0; } /* generate a global suffix for our temp files and have each node check it's list */ char suffix[8]; uint64_t retry; /* seed our random number generator */ srand(time(NULL)); /* keep trying to make a valid random suffix...*/ do { uint64_t attempt = 0; /* make rank 0 responsible for generating a random suffix */ if (rank == 0) { generate_suffix(suffix, sizeof(suffix)); } /* broadcast the random suffix to all ranks */ MPI_Bcast(suffix, sizeof(suffix), MPI_CHAR, 0, MPI_COMM_WORLD); /* check that the file doesn't already exist */ uint64_t size = mfu_flist_size(filtered); for (idx = 0; idx < size; idx++) { char temp_path[PATH_MAX]; strcpy(temp_path, mfu_flist_file_get_name(filtered, idx)); strcat(temp_path, suffix); if(!mfu_access(temp_path, F_OK)) { /* the file already exists */ attempt = 1; break; } } /* do a reduce to figure out if a rank has a file collision */ MPI_Allreduce(&attempt, &retry, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD); } while(retry != 0); /* initialize progress messages while creating files */ create_prog_count = 0; create_prog = mfu_progress_start(mfu_progress_timeout, 1, MPI_COMM_WORLD, create_progress_fn); /* create new files so we can restripe */ uint64_t size = mfu_flist_size(filtered); for (idx = 0; idx < size; idx++) { char temp_path[PATH_MAX]; strcpy(temp_path, mfu_flist_file_get_name(filtered, idx)); strcat(temp_path, suffix); /* create a striped file at the temp file path */ mfu_stripe_set(temp_path, stripe_size, stripes); /* update our status for file create progress */ create_prog_count++; mfu_progress_update(&create_prog_count, create_prog); } /* finalize file create progress messages */ mfu_progress_complete(&create_prog_count, &create_prog); MPI_Barrier(MPI_COMM_WORLD); /* initialize progress messages while copying data */ stripe_prog_bytes = 0; stripe_prog = mfu_progress_start(mfu_progress_timeout, 1, MPI_COMM_WORLD, stripe_progress_fn); /* found a suffix, now we need to break our files into chunks based on stripe size */ mfu_file_chunk* file_chunks = mfu_file_chunk_list_alloc(filtered, stripe_size); mfu_file_chunk* p = file_chunks; while (p != NULL) { /* build path to temp file */ char temp_path[PATH_MAX]; strcpy(temp_path, p->name); strcat(temp_path, suffix); /* write each chunk in our list */ write_file_chunk(p, temp_path); /* move on to next file chunk */ p = p->next; } mfu_file_chunk_list_free(&file_chunks); /* finalize progress messages */ mfu_progress_complete(&stripe_prog_bytes, &stripe_prog); MPI_Barrier(MPI_COMM_WORLD); /* remove input file and rename temp file */ for (idx = 0; idx < size; idx++) { /* build path to temp file */ const char *in_path = mfu_flist_file_get_name(filtered, idx); char out_path[PATH_MAX]; strcpy(out_path, in_path); strcat(out_path, suffix); /* change the mode of the newly restriped file to be the same as the old one */ mode_t mode = (mode_t) mfu_flist_file_get_mode(filtered, idx); if (mfu_chmod(out_path, mode) != 0) { MFU_LOG(MFU_LOG_ERR, "Failed to chmod file %s (%s)", out_path, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } /* rename the new, restriped file to the old name */ if (rename(out_path, in_path) != 0) { MFU_LOG(MFU_LOG_ERR, "Failed to rename file %s to %s", out_path, in_path); MPI_Abort(MPI_COMM_WORLD, 1); } } /* wait for everyone to finish */ MPI_Barrier(MPI_COMM_WORLD); /* free the walk options */ mfu_walk_opts_delete(&walk_opts); /* free filtered list, path parameters */ mfu_flist_free(&filtered); mfu_param_path_free_all(numpaths, paths); mfu_free(&paths); mfu_finalize(); MPI_Finalize(); return 0; }
/* write a chunk of the file */ static void write_file_chunk(mfu_file_chunk* p, const char* out_path) { size_t chunk_size = 1024*1024; uint64_t base = (off_t)p->offset; uint64_t file_size = (off_t)p->file_size; const char *in_path = p->name; uint64_t stripe_size = (off_t)p->length; /* if the file size is 0, there's no data to restripe */ /* if the stripe size is 0, then there's no work to be done */ if (file_size == 0 || stripe_size == 0) { return; } /* allocate buffer */ void* buf = MFU_MALLOC(chunk_size); /* open input file for reading */ int in_fd = mfu_open(in_path, O_RDONLY); if (in_fd < 0) { MFU_LOG(MFU_LOG_ERR, "Failed to open input file %s (%s)", in_path, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } /* open output file for writing */ int out_fd = mfu_open(out_path, O_WRONLY); if (out_fd < 0) { MFU_LOG(MFU_LOG_ERR, "Failed to open output file %s (%s)", out_path, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } /* write data */ uint64_t chunk_id = 0; uint64_t stripe_read = 0; while (stripe_read < stripe_size) { /* determine number of bytes to read */ /* try to read a full chunk's worth of bytes */ size_t read_size = chunk_size; /* if the stripe doesn't have that much left */ uint64_t remainder = stripe_size - stripe_read; if (remainder < (uint64_t) read_size) { read_size = (size_t) remainder; } /* get byte offset to read from */ uint64_t offset = base + (chunk_id * chunk_size); if (offset < file_size) { /* the first byte falls within the file size, * now check the last byte */ uint64_t last = offset + (uint64_t) read_size; if (last > file_size) { /* the last byte is beyond the end, set read size * to the most we can read */ read_size = (size_t) (file_size - offset); } } else { /* the first byte we need to read is past the end of * the file, so don't read anything */ read_size = 0; } /* bail if we don't have anything to read */ if (read_size == 0) { break; } /* seek to correct spot in input file */ off_t pos = (off_t) offset; off_t seek_rc = mfu_lseek(in_path, in_fd, pos, SEEK_SET); if (seek_rc == (off_t)-1) { MFU_LOG(MFU_LOG_ERR, "Failed to seek in input file %s (%s)", in_path, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } /* read chunk from input */ ssize_t nread = mfu_read(in_path, in_fd, buf, read_size); /* check for errors */ if (nread < 0) { MFU_LOG(MFU_LOG_ERR, "Failed to read data from input file %s (%s)", in_path, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } /* check for short reads */ if (nread != read_size) { MFU_LOG(MFU_LOG_ERR, "Got a short read from input file %s", in_path); MPI_Abort(MPI_COMM_WORLD, 1); } /* seek to correct spot in output file */ seek_rc = mfu_lseek(out_path, out_fd, pos, SEEK_SET); if (seek_rc == (off_t)-1) { MFU_LOG(MFU_LOG_ERR, "Failed to seek in output file %s (%s)", out_path, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } /* write chunk to output */ ssize_t nwrite = mfu_write(out_path, out_fd, buf, read_size); /* check for errors */ if (nwrite < 0) { MFU_LOG(MFU_LOG_ERR, "Failed to write data to output file %s (%s)", out_path, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } /* check for short reads */ if (nwrite != read_size) { MFU_LOG(MFU_LOG_ERR, "Got a short write to output file %s", out_path); MPI_Abort(MPI_COMM_WORLD, 1); } /* update our byte count for progress messages */ stripe_prog_bytes += read_size; mfu_progress_update(&stripe_prog_bytes, stripe_prog); /* go on to the next chunk in this stripe, we assume we * read the whole chunk size, if we didn't it's because * the stripe is smaller or we're at the end of the file, * but in either case we're done so it doesn't hurt to * over estimate in this calculation */ stripe_read += (uint64_t) chunk_size; chunk_id++; } /* close files */ mfu_fsync(out_path, out_fd); mfu_close(out_path, out_fd); mfu_close(in_path, in_fd); /* free buffer */ mfu_free(&buf); }
void mfu_param_path_set_all(uint64_t num, const char** paths, mfu_param_path* params) { /* get our rank and number of ranks */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* determine number we should look up */ uint64_t count = num / (uint64_t) ranks; uint64_t extra = num - count * (uint64_t) ranks; if (rank < (int) extra) { /* procs whose rank is less than extra each * handle one extra param than those whose * rank is equal or greater than extra */ count++; } /* determine our starting point */ uint64_t start = 0; if (rank < (int) extra) { /* for these procs, count is one more than procs with ranks >= extra */ start = (uint64_t)rank * count; } else { /* for these procs, count is one less than procs with ranks < extra */ start = extra * (count + 1) + ((uint64_t)rank - extra) * count; } /* TODO: allocate temporary params */ mfu_param_path* p = MFU_MALLOC(count * sizeof(mfu_param_path)); /* track maximum path length */ uint64_t bytes = 0; /* process each path we're responsible for */ uint64_t i; for (i = 0; i < count; i++) { /* get pointer to param structure */ mfu_param_path* param = &p[i]; /* initialize all fields */ mfu_param_path_init(param); /* lookup the path */ uint64_t path_idx = start + i; const char* path = paths[path_idx]; /* set param fields for path */ if (path != NULL) { /* make a copy of original path */ param->orig = MFU_STRDUP(path); /* get absolute path and remove ".", "..", consecutive "/", * and trailing "/" characters */ param->path = mfu_path_strdup_abs_reduce_str(path); /* get stat info for simplified path */ if (mfu_lstat(param->path, ¶m->path_stat) == 0) { param->path_stat_valid = 1; } /* TODO: we use realpath below, which is nice since it takes out * ".", "..", symlinks, and adds the absolute path, however, it * fails if the file/directory does not already exist, which is * often the case for dest path. */ /* resolve any symlinks */ char target[PATH_MAX]; if (realpath(path, target) != NULL) { /* make a copy of resolved name */ param->target = MFU_STRDUP(target); /* get stat info for resolved path */ if (mfu_lstat(param->target, ¶m->target_stat) == 0) { param->target_stat_valid = 1; } } /* add in bytes needed to pack this param */ bytes += (uint64_t) mfu_pack_param_size(param); } } /* TODO: eventually it would be nice to leave this data distributed, * however for now some tools expect all params to be defined */ /* allgather to get bytes on each process */ int* recvcounts = (int*) MFU_MALLOC(ranks * sizeof(int)); int* recvdispls = (int*) MFU_MALLOC(ranks * sizeof(int)); int sendcount = (int) bytes; MPI_Allgather(&sendcount, 1, MPI_INT, recvcounts, 1, MPI_INT, MPI_COMM_WORLD); /* compute displacements and total number of bytes that we'll receive */ uint64_t allbytes = 0; int disp = 0; for (i = 0; i < (uint64_t) ranks; i++) { recvdispls[i] = disp; disp += recvcounts[i]; allbytes += (uint64_t) recvcounts[i]; } /* allocate memory for send and recv buffers */ char* sendbuf = MFU_MALLOC(bytes); char* recvbuf = MFU_MALLOC(allbytes); /* pack send buffer */ char* ptr = sendbuf; for (i = 0; i < count; i++) { mfu_pack_param(&ptr, &p[i]); } /* allgatherv to collect data */ MPI_Allgatherv(sendbuf, sendcount, MPI_BYTE, recvbuf, recvcounts, recvdispls, MPI_BYTE, MPI_COMM_WORLD); /* unpack recv buffer into caller's params */ ptr = recvbuf; for (i = 0; i < num; i++) { mfu_unpack_param((const char**)(&ptr), ¶ms[i]); } /* Loop through the list of files &/or directories, and check the params * struct to see if all of them are valid file names. If one is not, let * the user know by printing a warning */ if (rank == 0) { for (i = 0; i < num; i++) { /* get pointer to param structure */ mfu_param_path* param = ¶ms[i]; if (param->path_stat_valid == 0) { /* failed to find a file at this location, let user know (may be a typo) */ MFU_LOG(MFU_LOG_WARN, "Warning: `%s' does not exist", param->orig); } } } /* free message buffers */ mfu_free(&recvbuf); mfu_free(&sendbuf); /* free arrays for recv counts and displacements */ mfu_free(&recvdispls); mfu_free(&recvcounts); /* free temporary params */ mfu_param_path_free_list(count, p); mfu_free(&p); return; }
int main(int argc, char** argv) { int i; /* initialize MPI */ MPI_Init(&argc, &argv); mfu_init(); /* get our rank and the size of comm_world */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* pointer to mfu_walk_opts */ mfu_walk_opts_t* walk_opts = mfu_walk_opts_new(); /* TODO: extend options * - allow user to cache scan result in file * - allow user to load cached scan as input * * - allow user to filter by user, group, or filename using keyword or regex * - allow user to specify time window * - allow user to specify file sizes * * - allow user to sort by different fields * - allow user to group output (sum all bytes, group by user) */ char* inputname = NULL; char* outputname = NULL; char* sortfields = NULL; char* distribution = NULL; int file_histogram = 0; int walk = 0; int print = 0; int text = 0; struct distribute_option option; /* verbose by default */ mfu_debug_level = MFU_LOG_VERBOSE; int option_index = 0; static struct option long_options[] = { {"input", 1, 0, 'i'}, {"output", 1, 0, 'o'}, {"text", 0, 0, 't'}, {"lite", 0, 0, 'l'}, {"sort", 1, 0, 's'}, {"distribution", 1, 0, 'd'}, {"file_histogram", 0, 0, 'f'}, {"print", 0, 0, 'p'}, {"verbose", 0, 0, 'v'}, {"quiet", 0, 0, 'q'}, {"help", 0, 0, 'h'}, {0, 0, 0, 0} }; int usage = 0; while (1) { int c = getopt_long( argc, argv, "i:o:tls:d:fpvqh", long_options, &option_index ); if (c == -1) { break; } switch (c) { case 'i': inputname = MFU_STRDUP(optarg); break; case 'o': outputname = MFU_STRDUP(optarg); break; case 'l': /* don't stat each file on the walk */ walk_opts->use_stat = 0; break; case 's': sortfields = MFU_STRDUP(optarg); break; case 'd': distribution = MFU_STRDUP(optarg); break; case 'f': file_histogram = 1; break; case 'p': print = 1; break; case 'v': mfu_debug_level = MFU_LOG_VERBOSE; break; case 'q': mfu_debug_level = 0; break; case 't': text = 1; break; case 'h': usage = 1; break; case '?': usage = 1; break; default: if (rank == 0) { printf("?? getopt returned character code 0%o ??\n", c); } } } /* paths to walk come after the options */ int numpaths = 0; mfu_param_path* paths = NULL; if (optind < argc) { /* got a path to walk */ walk = 1; /* determine number of paths specified by user */ numpaths = argc - optind; /* allocate space for each path */ paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path)); /* process each path */ char** p = &argv[optind]; mfu_param_path_set_all((uint64_t)numpaths, (const char**)p, paths); optind += numpaths; /* don't allow user to specify input file with walk */ if (inputname != NULL) { usage = 1; } } else { /* if we're not walking, we must be reading, * and for that we need a file */ if (inputname == NULL) { usage = 1; } } /* if user is trying to sort, verify the sort fields are valid */ if (sortfields != NULL) { int maxfields; int nfields = 0; char* sortfields_copy = MFU_STRDUP(sortfields); if (walk_opts->use_stat) { maxfields = 7; char* token = strtok(sortfields_copy, ","); while (token != NULL) { if (strcmp(token, "name") != 0 && strcmp(token, "-name") != 0 && strcmp(token, "user") != 0 && strcmp(token, "-user") != 0 && strcmp(token, "group") != 0 && strcmp(token, "-group") != 0 && strcmp(token, "uid") != 0 && strcmp(token, "-uid") != 0 && strcmp(token, "gid") != 0 && strcmp(token, "-gid") != 0 && strcmp(token, "atime") != 0 && strcmp(token, "-atime") != 0 && strcmp(token, "mtime") != 0 && strcmp(token, "-mtime") != 0 && strcmp(token, "ctime") != 0 && strcmp(token, "-ctime") != 0 && strcmp(token, "size") != 0 && strcmp(token, "-size") != 0) { /* invalid token */ if (rank == 0) { printf("Invalid sort field: %s\n", token); } usage = 1; } nfields++; token = strtok(NULL, ","); } } else { maxfields = 1; char* token = strtok(sortfields_copy, ","); while (token != NULL) { if (strcmp(token, "name") != 0 && strcmp(token, "-name") != 0) { /* invalid token */ if (rank == 0) { printf("Invalid sort field: %s\n", token); } usage = 1; } nfields++; token = strtok(NULL, ","); } } if (nfields > maxfields) { if (rank == 0) { printf("Exceeded maximum number of sort fields: %d\n", maxfields); } usage = 1; } mfu_free(&sortfields_copy); } if (distribution != NULL) { if (distribution_parse(&option, distribution) != 0) { if (rank == 0) { printf("Invalid distribution argument: %s\n", distribution); } usage = 1; } else if (rank == 0 && option.separator_number != 0) { printf("Separators: "); for (i = 0; i < option.separator_number; i++) { if (i != 0) { printf(", "); } printf("%"PRIu64, option.separators[i]); } printf("\n"); } } if (usage) { if (rank == 0) { print_usage(); } MPI_Finalize(); return 0; } /* TODO: check stat fields fit within MPI types */ // if (sizeof(st_uid) > uint64_t) error(); etc... /* create an empty file list with default values */ mfu_flist flist = mfu_flist_new(); if (walk) { /* walk list of input paths */ mfu_flist_walk_param_paths(numpaths, paths, walk_opts, flist); } else { /* read data from cache file */ mfu_flist_read_cache(inputname, flist); } /* TODO: filter files */ //filter_files(&flist); /* sort files */ if (sortfields != NULL) { /* TODO: don't sort unless all_count > 0 */ mfu_flist_sort(sortfields, &flist); } /* print details for individual files */ if (print) { mfu_flist_print(flist); } /* print summary statistics of flist */ mfu_flist_print_summary(flist); /* print distribution if user specified this option */ if (distribution != NULL || file_histogram) { print_flist_distribution(file_histogram, &option, &flist, rank); } /* write data to cache file */ if (outputname != NULL) { if (!text) { mfu_flist_write_cache(outputname, flist); } else { mfu_flist_write_text(outputname, flist); } } /* free users, groups, and files objects */ mfu_flist_free(&flist); /* free memory allocated for options */ mfu_free(&distribution); mfu_free(&sortfields); mfu_free(&outputname); mfu_free(&inputname); /* free the path parameters */ mfu_param_path_free_all(numpaths, paths); /* free memory allocated to hold params */ mfu_free(&paths); /* free the walk options */ mfu_walk_opts_delete(&walk_opts); /* shut down MPI */ mfu_finalize(); MPI_Finalize(); return 0; }
/* for given depth, hash directory name and map to processes to * test whether having all files in same directory on one process * matters */ size_t mfu_flist_distribute_map(mfu_flist list, char** buffer, mfu_flist_name_encode_fn encode, mfu_flist_map_fn map, void* args) { uint64_t idx; /* get our rank and number of ranks in job */ int ranks; MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* allocate arrays for alltoall */ size_t bufsize = (size_t)ranks * sizeof(int); int* sendsizes = (int*) MFU_MALLOC(bufsize); int* senddisps = (int*) MFU_MALLOC(bufsize); int* sendoffset = (int*) MFU_MALLOC(bufsize); int* recvsizes = (int*) MFU_MALLOC(bufsize); int* recvdisps = (int*) MFU_MALLOC(bufsize); /* initialize sendsizes and offsets */ int i; for (i = 0; i < ranks; i++) { sendsizes[i] = 0; sendoffset[i] = 0; } /* compute number of bytes we'll send to each rank */ size_t sendbytes = 0; uint64_t size = mfu_flist_size(list); for (idx = 0; idx < size; idx++) { int dest = map(list, idx, ranks, args); /* TODO: check that pack size doesn't overflow int */ /* total number of bytes we'll send to each rank and the total overall */ size_t count = encode(NULL, list, idx, args); sendsizes[dest] += (int) count; sendbytes += count; } /* compute displacements */ senddisps[0] = 0; for (i = 1; i < ranks; i++) { senddisps[i] = senddisps[i - 1] + sendsizes[i - 1]; } /* allocate space */ char* sendbuf = (char*) MFU_MALLOC(sendbytes); /* copy data into buffer */ for (idx = 0; idx < size; idx++) { int dest = map(list, idx, ranks, args); /* identify region to be sent to rank */ char* path = sendbuf + senddisps[dest] + sendoffset[dest]; size_t count = encode(path, list, idx, args); /* TODO: check that pack size doesn't overflow int */ /* bump up the offset for this rank */ sendoffset[dest] += (int) count; } /* alltoall to specify incoming counts */ MPI_Alltoall(sendsizes, 1, MPI_INT, recvsizes, 1, MPI_INT, MPI_COMM_WORLD); /* compute size of recvbuf and displacements */ size_t recvbytes = 0; recvdisps[0] = 0; for (i = 0; i < ranks; i++) { recvbytes += (size_t) recvsizes[i]; if (i > 0) { recvdisps[i] = recvdisps[i - 1] + recvsizes[i - 1]; } } /* allocate recvbuf */ char* recvbuf = (char*) MFU_MALLOC(recvbytes); /* alltoallv to send data */ MPI_Alltoallv( sendbuf, sendsizes, senddisps, MPI_CHAR, recvbuf, recvsizes, recvdisps, MPI_CHAR, MPI_COMM_WORLD ); /* free memory */ mfu_free(&recvdisps); mfu_free(&recvsizes); mfu_free(&sendbuf); mfu_free(&sendoffset); mfu_free(&senddisps); mfu_free(&sendsizes); *buffer = recvbuf; return recvbytes; }
static void mfu_flist_archive_create_libcircle(mfu_flist flist, const char* archivefile, mfu_archive_options_t* opts) { DTAR_flist = flist; DTAR_user_opts = *opts; MPI_Comm_rank(MPI_COMM_WORLD, &DTAR_rank); /* TODO: stripe the archive file if on parallel file system */ /* init statistics */ DTAR_statistics.total_dirs = 0; DTAR_statistics.total_files = 0; DTAR_statistics.total_links = 0; DTAR_statistics.total_size = 0; DTAR_statistics.total_bytes_copied = 0; time(&(DTAR_statistics.time_started)); DTAR_statistics.wtime_started = MPI_Wtime(); /* create the archive file */ DTAR_writer.name = archivefile; DTAR_writer.flags = O_WRONLY | O_CREAT | O_CLOEXEC | O_LARGEFILE; DTAR_writer.fd_tar = open(archivefile, DTAR_writer.flags, 0664); /* get number of items in our portion of the list */ DTAR_count = mfu_flist_size(DTAR_flist); /* allocate memory for file sizes and offsets */ uint64_t* fsizes = (uint64_t*) MFU_MALLOC(DTAR_count * sizeof(uint64_t)); DTAR_offsets = (uint64_t*) MFU_MALLOC(DTAR_count * sizeof(uint64_t)); /* compute local offsets for each item and total * bytes we're contributing to the archive */ uint64_t idx; uint64_t offset = 0; for (idx = 0; idx < DTAR_count; idx++) { /* assume the item takes no space */ fsizes[idx] = 0; /* identify item type to compute its size in the archive */ mfu_filetype type = mfu_flist_file_get_type(DTAR_flist, idx); if (type == MFU_TYPE_DIR || type == MFU_TYPE_LINK) { /* directories and symlinks only need the header */ fsizes[idx] = DTAR_HDR_LENGTH; } else if (type == MFU_TYPE_FILE) { /* regular file requires a header, plus file content, * and things are packed into blocks of 512 bytes */ uint64_t fsize = mfu_flist_file_get_size(DTAR_flist, idx); /* determine whether file size is integer multiple of 512 bytes */ uint64_t rem = fsize % 512; if (rem == 0) { /* file content is multiple of 512 bytes, so perfect fit */ fsizes[idx] = fsize + DTAR_HDR_LENGTH; } else { /* TODO: check and explain this math */ fsizes[idx] = (fsize / 512 + 4) * 512; } } /* increment our local offset for this item */ DTAR_offsets[idx] = offset; offset += fsizes[idx]; } /* execute scan to figure our global base offset in the archive file */ uint64_t global_offset = 0; MPI_Scan(&offset, &global_offset, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); global_offset -= offset; /* update offsets for each of our file to their global offset */ for (idx = 0; idx < DTAR_count; idx++) { DTAR_offsets[idx] += global_offset; } /* create an archive */ struct archive* ar = archive_write_new(); archive_write_set_format_pax(ar); int r = archive_write_open_fd(ar, DTAR_writer.fd_tar); if (r != ARCHIVE_OK) { MFU_LOG(MFU_LOG_ERR, "archive_write_open_fd(): %s", archive_error_string(ar)); DTAR_abort(EXIT_FAILURE); } /* write headers for our files */ for (idx = 0; idx < DTAR_count; idx++) { mfu_filetype type = mfu_flist_file_get_type(DTAR_flist, idx); if (type == MFU_TYPE_FILE || type == MFU_TYPE_DIR || type == MFU_TYPE_LINK) { DTAR_write_header(ar, idx, DTAR_offsets[idx]); } } /* prepare libcircle */ CIRCLE_init(0, NULL, CIRCLE_SPLIT_EQUAL | CIRCLE_CREATE_GLOBAL); CIRCLE_loglevel loglevel = CIRCLE_LOG_WARN; CIRCLE_enable_logging(loglevel); /* register callbacks */ CIRCLE_cb_create(&DTAR_enqueue_copy); CIRCLE_cb_process(&DTAR_perform_copy); /* run the libcircle job to copy data into archive file */ CIRCLE_begin(); CIRCLE_finalize(); /* compute total bytes copied */ uint64_t archive_size = 0; MPI_Allreduce(&offset, &archive_size, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); DTAR_statistics.total_size = archive_size; DTAR_statistics.wtime_ended = MPI_Wtime(); time(&(DTAR_statistics.time_ended)); /* print stats */ double rel_time = DTAR_statistics.wtime_ended - \ DTAR_statistics.wtime_started; if (DTAR_rank == 0) { char starttime_str[256]; struct tm* localstart = localtime(&(DTAR_statistics.time_started)); strftime(starttime_str, 256, "%b-%d-%Y, %H:%M:%S", localstart); char endtime_str[256]; struct tm* localend = localtime(&(DTAR_statistics.time_ended)); strftime(endtime_str, 256, "%b-%d-%Y, %H:%M:%S", localend); /* add two 512 blocks at the end */ DTAR_statistics.total_size += 512 * 2; /* convert bandwidth to unit */ double agg_rate_tmp; double agg_rate = (double) DTAR_statistics.total_size / rel_time; const char* agg_rate_units; mfu_format_bytes(agg_rate, &agg_rate_tmp, &agg_rate_units); MFU_LOG(MFU_LOG_INFO, "Started: %s", starttime_str); MFU_LOG(MFU_LOG_INFO, "Completed: %s", endtime_str); MFU_LOG(MFU_LOG_INFO, "Total archive size: %" PRIu64, DTAR_statistics.total_size); MFU_LOG(MFU_LOG_INFO, "Rate: %.3lf %s " \ "(%.3" PRIu64 " bytes in %.3lf seconds)", \ agg_rate_tmp, agg_rate_units, DTAR_statistics.total_size, rel_time); } /* clean up */ mfu_free(&fsizes); mfu_free(&DTAR_offsets); /* close archive file */ archive_write_free(ar); mfu_close(DTAR_writer.name, DTAR_writer.fd_tar); }
static int print_flist_distribution(int file_histogram, struct distribute_option *option, mfu_flist* pflist, int rank) { /* file list to use */ mfu_flist flist = *pflist; /* get local size for each rank, and max file sizes */ uint64_t size = mfu_flist_size(flist); uint64_t global_max_file_size; int separators = 0; if (file_histogram) { /* create default separators */ create_default_separators(option, &flist, &size, &separators, &global_max_file_size); } else { separators = option->separator_number; } /* allocate a count for each bin, initialize the bin counts to 0 * it is separator + 1 because the last bin is the last separator * to the DISTRIBUTE_MAX */ uint64_t* dist = (uint64_t*) MFU_MALLOC((separators + 1) * sizeof(uint64_t)); /* initialize the bin counts to 0 */ for (int i = 0; i <= separators; i++) { dist[i] = 0; } /* for each file, identify appropriate bin and increment its count */ for (int i = 0; i < size; i++) { /* get the size of the file */ uint64_t file_size = mfu_flist_file_get_size(flist, i); /* loop through the bins and find the one the file belongs to, * set last bin to -1, if a bin is not found while looping through the * list of file size separators, then it belongs in the last bin * so (last file size - MAX bin) */ int max_bin_flag = -1; for (int j = 0; j < separators; j++) { if (file_size <= option->separators[j]) { /* found the bin set bin index & increment its count */ dist[j]++; /* a file for this bin was found so can't belong to * last bin (so set the flag) & exit the loop */ max_bin_flag = 1; break; } } /* if max_bin_flag is still -1 then the file belongs to the last bin */ if (max_bin_flag < 0) { dist[separators]++; } } /* get the total sum across all of the bins */ uint64_t* disttotal = (uint64_t*) MFU_MALLOC((separators + 1) * sizeof(uint64_t)); MPI_Allreduce(dist, disttotal, (uint64_t)separators + 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); /* Print the file distribution */ if (rank == 0) { /* number of files in a bin */ uint64_t number; double size_tmp; const char* size_units; printf("%-27s %s\n", "Range", "Number"); for (int i = 0; i <= separators; i++) { printf("%s", "[ "); if (i == 0) { printf("%7.3lf %2s", 0.000, "B"); } else { mfu_format_bytes((uint64_t)option->separators[i - 1], &size_tmp, &size_units); printf("%7.3lf %2s", size_tmp, size_units); } printf("%s", " - "); if (file_histogram) { mfu_format_bytes((uint64_t)option->separators[i], &size_tmp, &size_units); number = disttotal[i]; mfu_format_bytes((uint64_t)option->separators[i], &size_tmp, &size_units); printf("%7.3lf %2s ) %"PRIu64"\n", size_tmp, size_units, number); } else { if (i == separators) { number = disttotal[i]; printf("%10s ) %"PRIu64"\n", "MAX", number); } else { number = disttotal[i]; mfu_format_bytes((uint64_t)option->separators[i], &size_tmp, &size_units); printf("%7.3lf %2s ) %"PRIu64"\n", size_tmp, size_units, number); } } } } /* free the memory used to hold bin counts */ mfu_free(&disttotal); mfu_free(&dist); return 0; }
static int sort_files_readdir(const char* sortfields, mfu_flist* pflist) { /* get list from caller */ mfu_flist flist = *pflist; /* create a new list as subset of original list */ mfu_flist flist2 = mfu_flist_subset(flist); uint64_t incount = mfu_flist_size(flist); uint64_t chars = mfu_flist_file_max_name(flist); /* create datatype for packed file list element */ MPI_Datatype dt_sat; size_t bytes = mfu_flist_file_pack_size(flist); MPI_Type_contiguous((int)bytes, MPI_BYTE, &dt_sat); /* get our rank and the size of comm_world */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* build type for file path */ MPI_Datatype dt_filepath; MPI_Type_contiguous((int)chars, MPI_CHAR, &dt_filepath); MPI_Type_commit(&dt_filepath); /* build comparison op for filenames */ DTCMP_Op op_filepath; if (DTCMP_Op_create(dt_filepath, my_strcmp, &op_filepath) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create sorting operation for filepath"); } /* build comparison op for filenames */ DTCMP_Op op_filepath_rev; if (DTCMP_Op_create(dt_filepath, my_strcmp_rev, &op_filepath_rev) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create reverse sorting operation for filepath"); } /* TODO: process sort fields */ const int MAXFIELDS = 1; MPI_Datatype types[MAXFIELDS]; DTCMP_Op ops[MAXFIELDS]; sort_field fields[MAXFIELDS]; size_t lengths[MAXFIELDS]; int nfields = 0; for (nfields = 0; nfields < MAXFIELDS; nfields++) { types[nfields] = MPI_DATATYPE_NULL; ops[nfields] = DTCMP_OP_NULL; } nfields = 0; char* sortfields_copy = MFU_STRDUP(sortfields); char* token = strtok(sortfields_copy, ","); while (token != NULL) { int valid = 1; if (strcmp(token, "name") == 0) { types[nfields] = dt_filepath; ops[nfields] = op_filepath; fields[nfields] = FILENAME; lengths[nfields] = chars; } else if (strcmp(token, "-name") == 0) { types[nfields] = dt_filepath; ops[nfields] = op_filepath_rev; fields[nfields] = FILENAME; lengths[nfields] = chars; } else { /* invalid token */ valid = 0; if (rank == 0) { MFU_LOG(MFU_LOG_ERR, "Invalid sort field: %s\n", token); } } if (valid) { nfields++; } if (nfields > MAXFIELDS) { /* TODO: print warning if we have too many fields */ break; } token = strtok(NULL, ","); } mfu_free(&sortfields_copy); /* build key type */ MPI_Datatype dt_key; if (DTCMP_Type_create_series(nfields, types, &dt_key) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create type for key"); } /* create sort op */ DTCMP_Op op_key; if (DTCMP_Op_create_series(nfields, ops, &op_key) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create sorting operation for key"); } /* build keysat type */ MPI_Datatype dt_keysat, keysat_types[2]; keysat_types[0] = dt_key; keysat_types[1] = dt_sat; if (DTCMP_Type_create_series(2, keysat_types, &dt_keysat) != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to create type for keysat"); } /* get extent of key type */ MPI_Aint key_lb, key_extent; MPI_Type_get_extent(dt_key, &key_lb, &key_extent); /* get extent of keysat type */ MPI_Aint keysat_lb, keysat_extent; MPI_Type_get_extent(dt_keysat, &keysat_lb, &keysat_extent); /* get extent of sat type */ MPI_Aint sat_lb, sat_extent; MPI_Type_get_extent(dt_sat, &sat_lb, &sat_extent); /* compute size of sort element and allocate buffer */ size_t sortbufsize = (size_t)keysat_extent * incount; void* sortbuf = MFU_MALLOC(sortbufsize); /* copy data into sort elements */ uint64_t idx = 0; char* sortptr = (char*) sortbuf; while (idx < incount) { /* copy in access time */ int i; for (i = 0; i < nfields; i++) { if (fields[i] == FILENAME) { const char* name = mfu_flist_file_get_name(flist, idx); strcpy(sortptr, name); } sortptr += lengths[i]; } /* pack file element */ sortptr += mfu_flist_file_pack(sortptr, flist, idx); idx++; } /* sort data */ void* outsortbuf; int outsortcount; DTCMP_Handle handle; int sort_rc = DTCMP_Sortz( sortbuf, (int)incount, &outsortbuf, &outsortcount, dt_key, dt_keysat, op_key, DTCMP_FLAG_NONE, MPI_COMM_WORLD, &handle ); if (sort_rc != DTCMP_SUCCESS) { MFU_ABORT(1, "Failed to sort data"); } /* step through sorted data filenames */ idx = 0; sortptr = (char*) outsortbuf; while (idx < (uint64_t)outsortcount) { sortptr += key_extent; sortptr += mfu_flist_file_unpack(sortptr, flist2); idx++; } /* build summary of new list */ mfu_flist_summarize(flist2); /* free memory */ DTCMP_Free(&handle); /* free ops */ DTCMP_Op_free(&op_key); DTCMP_Op_free(&op_filepath_rev); DTCMP_Op_free(&op_filepath); /* free types */ MPI_Type_free(&dt_keysat); MPI_Type_free(&dt_key); MPI_Type_free(&dt_filepath); /* free input buffer holding sort elements */ mfu_free(&sortbuf); /* free the satellite type */ MPI_Type_free(&dt_sat); /* return new list and free old one */ *pflist = flist2; mfu_flist_free(&flist); return MFU_SUCCESS; }
/** * Encode an operation code for use on the distributed queue structure. */ char* DCOPY_encode_operation(DCOPY_operation_code_t code, \ int64_t chunk, \ char* operand, \ uint16_t source_base_offset, \ char* dest_base_appendix, \ int64_t file_size) { /* * FIXME: This requires architecture changes in libcircle -- a redesign of * internal queue data structures to allow void* types as queue elements * instead of null terminated strings. Ignoring this problem by commenting * out this check will likely cause silent data corruption. */ /* allocate memory to encode op */ char* op = (char*) MFU_MALLOC(CIRCLE_MAX_STRING_LEN); /* set pointer to next byte to write to and record number of bytes left */ char* ptr = op; size_t remaining = CIRCLE_MAX_STRING_LEN; /* encode operation and get number of bytes required to do so */ size_t len = strlen(operand); int written = snprintf(ptr, remaining, "%" PRId64 ":%" PRId64 ":%" PRIu16 ":%d:%d:%s", \ file_size, chunk, source_base_offset, code, (int)len, operand); /* snprintf returns number of bytes written excluding terminating NUL, * so if we're equal, we'd write one byte too many */ if((size_t)written >= remaining) { MFU_LOG(MFU_LOG_ERR, \ "Exceeded libcircle message size due to large file path. " \ "This is a known bug in dcp that we intend to fix. Sorry!"); DCOPY_abort(EXIT_FAILURE); } /* update pointer and number of bytes remaining, * note that we don't include the terminating NUL in this case */ ptr += written; remaining -= (size_t) written; /* tack on destination base appendix if we have one */ if(dest_base_appendix) { len = strlen(dest_base_appendix); written = snprintf(ptr, remaining, ":%d:%s", (int)len, dest_base_appendix); /* snprintf returns number of bytes written excluding terminating NUL, * so if we're equal, we'd write one byte too many */ if((size_t)written >= remaining) { MFU_LOG(MFU_LOG_ERR, \ "Exceeded libcircle message size due to large file path. " \ "This is a known bug in dcp that we intend to fix. Sorry!"); DCOPY_abort(EXIT_FAILURE); } /* update pointer and number of bytes remaining, * note that we don't include the terminating NUL in this case */ ptr += written; remaining -= (size_t) written; } return op; }
/* for each depth, sort files by filename and then remove, to test * whether it matters to limit the number of directories each process * has to reference (e.g., locking) */ static void remove_sort(mfu_flist list, uint64_t* rmcount) { /* bail out if total count is 0 */ uint64_t all_count = mfu_flist_global_size(list); if (all_count == 0) { return; } /* get maximum file name and number of items */ int chars = (int) mfu_flist_file_max_name(list); uint64_t my_count = mfu_flist_size(list); /* create key datatype (filename) and comparison op */ MPI_Datatype dt_key; DTCMP_Op op_str; DTCMP_Str_create_ascend(chars, &dt_key, &op_str); /* create keysat datatype (filename + type) */ MPI_Datatype types[2], dt_keysat; types[0] = dt_key; types[1] = MPI_CHAR; DTCMP_Type_create_series(2, types, &dt_keysat); /* allocate send buffer */ int sendcount = (int) my_count; size_t sendbufsize = (size_t)(sendcount * (chars + 1)); char* sendbuf = (char*) MFU_MALLOC(sendbufsize); /* copy data into buffer */ char* ptr = sendbuf; uint64_t idx; for (idx = 0; idx < my_count; idx++) { /* encode the filename first */ const char* name = mfu_flist_file_get_name(list, idx); strcpy(ptr, name); ptr += chars; /* last character encodes item type */ mfu_filetype type = mfu_flist_file_get_type(list, idx); if (type == MFU_TYPE_DIR) { ptr[0] = 'd'; } else if (type == MFU_TYPE_FILE || type == MFU_TYPE_LINK) { ptr[0] = 'f'; } else { ptr[0] = 'u'; } ptr++; } /* sort items */ void* recvbuf; int recvcount; DTCMP_Handle handle; DTCMP_Sortz( sendbuf, sendcount, &recvbuf, &recvcount, dt_key, dt_keysat, op_str, DTCMP_FLAG_NONE, MPI_COMM_WORLD, &handle ); /* delete data */ int delcount = 0; ptr = (char*)recvbuf; while (delcount < recvcount) { /* get item name */ char* name = ptr; ptr += chars; /* get item type */ char type = ptr[0]; ptr++; /* delete item */ remove_type(type, name); delcount++; } /* record number of items we deleted */ *rmcount = (uint64_t) delcount; /* free output data */ DTCMP_Free(&handle); /* free our send buffer */ mfu_free(&sendbuf); /* free key comparison operation */ DTCMP_Op_free(&op_str); /* free datatypes */ MPI_Type_free(&dt_keysat); MPI_Type_free(&dt_key); return; }
/* given a list of files print from the start to end of the list */ void mfu_flist_print(mfu_flist flist) { /* number of items to print from start and end of list */ uint64_t range = 10; /* allocate send and receive buffers */ size_t pack_size = mfu_flist_file_pack_size(flist); size_t bufsize = 2 * range * pack_size; void* sendbuf = MFU_MALLOC(bufsize); void* recvbuf = MFU_MALLOC(bufsize); /* get our rank and the size of comm_world */ int rank, ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* identify the number of items we have, the total number, * and our offset in the global list */ uint64_t count = mfu_flist_size(flist); uint64_t total = mfu_flist_global_size(flist); uint64_t offset = mfu_flist_global_offset(flist); /* count the number of items we'll send */ int num = 0; uint64_t idx = 0; while (idx < count) { uint64_t global = offset + idx; if (global < range || (total - global) <= range) { num++; } idx++; } /* allocate arrays to store counts and displacements */ int* counts = (int*) MFU_MALLOC((size_t)ranks * sizeof(int)); int* disps = (int*) MFU_MALLOC((size_t)ranks * sizeof(int)); /* tell rank 0 where the data is coming from */ int bytes = num * (int)pack_size; MPI_Gather(&bytes, 1, MPI_INT, counts, 1, MPI_INT, 0, MPI_COMM_WORLD); /* pack items into sendbuf */ idx = 0; char* ptr = (char*) sendbuf; while (idx < count) { uint64_t global = offset + idx; if (global < range || (total - global) <= range) { ptr += mfu_flist_file_pack(ptr, flist, idx); } idx++; } /* compute displacements and total bytes */ int recvbytes = 0; if (rank == 0) { int i; disps[0] = 0; recvbytes += counts[0]; for (i = 1; i < ranks; i++) { disps[i] = disps[i - 1] + counts[i - 1]; recvbytes += counts[i]; } } /* gather data to rank 0 */ MPI_Gatherv(sendbuf, bytes, MPI_BYTE, recvbuf, counts, disps, MPI_BYTE, 0, MPI_COMM_WORLD); /* create temporary list to unpack items into */ mfu_flist tmplist = mfu_flist_subset(flist); /* unpack items into new list */ if (rank == 0) { ptr = (char*) recvbuf; char* end = ptr + recvbytes; while (ptr < end) { mfu_flist_file_unpack(ptr, tmplist); ptr += pack_size; } } /* summarize list */ mfu_flist_summarize(tmplist); /* print files */ if (rank == 0) { printf("\n"); uint64_t tmpidx = 0; uint64_t tmpsize = mfu_flist_size(tmplist); while (tmpidx < tmpsize) { print_file(tmplist, tmpidx); tmpidx++; if (tmpidx == range && total > 2 * range) { /* going to have to leave some out */ printf("\n<snip>\n\n"); } } printf("\n"); } /* free our temporary list */ mfu_flist_free(&tmplist); /* free memory */ mfu_free(&disps); mfu_free(&counts); mfu_free(&sendbuf); mfu_free(&recvbuf); return; }