int DCOPY_close_file(DCOPY_file_cache_t* cache) { int rc = 0; /* close file if we have one */ char* name = cache->name; if (name != NULL) { /* TODO: if open for write, fsync? */ int fd = cache->fd; rc = mfu_close(name, fd); mfu_free(&cache->name); } return rc; }
int DCOPY_open_file(const char* file, int read_flag, DCOPY_file_cache_t* cache) { int newfd = -1; /* see if we have a cached file descriptor */ char* name = cache->name; if (name != NULL) { /* we have a cached file descriptor */ int fd = cache->fd; if (strcmp(name, file) == 0 && cache->read == read_flag) { /* the file we're trying to open matches name and read/write mode, * so just return the cached descriptor */ return fd; } else { /* the file we're trying to open is different, * close the old file and delete the name */ mfu_close(name, fd); mfu_free(&cache->name); } } /* open the new file */ if (read_flag) { int flags = O_RDONLY; if (DCOPY_user_opts.synchronous) { flags |= O_DIRECT; } newfd = mfu_open(file, flags); } else { int flags = O_WRONLY | O_CREAT; if (DCOPY_user_opts.synchronous) { flags |= O_DIRECT; } newfd = mfu_open(file, flags, DCOPY_DEF_PERMS_FILE); } /* cache the file descriptor */ if (newfd != -1) { cache->name = MFU_STRDUP(file); cache->fd = newfd; cache->read = read_flag; } return newfd; }
int DCOPY_close_file(DCOPY_file_cache_t* cache) { int rc = 0; /* close file if we have one */ char* name = cache->name; if (name != NULL) { int fd = cache->fd; /* if open for write, fsync */ int read_flag = cache->read; if (! read_flag) { rc = mfu_fsync(name, fd); } /* close the file and delete the name string */ rc = mfu_close(name, fd); mfu_free(&cache->name); } return rc; }
static void walk_getdents_process_dir(const char* dir, CIRCLE_handle* handle) { char buf[BUF_SIZE]; /* TODO: may need to try these functions multiple times */ int fd = mfu_open(dir, O_RDONLY | O_DIRECTORY); if (fd == -1) { /* print error */ MFU_LOG(MFU_LOG_ERR, "Failed to open directory for reading: `%s' (errno=%d %s)", dir, errno, strerror(errno)); return; } /* Read all directory entries */ while (1) { /* execute system call to get block of directory entries */ int nread = syscall(SYS_getdents, fd, buf, (int) BUF_SIZE); if (nread == -1) { MFU_LOG(MFU_LOG_ERR, "syscall to getdents failed when reading `%s' (errno=%d %s)", dir, errno, strerror(errno)); break; } /* bail out if we're done */ if (nread == 0) { break; } /* otherwise, we read some bytes, so process each record */ int bpos = 0; while (bpos < nread) { /* get pointer to current record */ struct linux_dirent* d = (struct linux_dirent*)(buf + bpos); /* get name of directory item, skip d_ino== 0, ".", and ".." entries */ char* name = d->d_name; if (d->d_ino != 0 && (strncmp(name, ".", 2)) && (strncmp(name, "..", 3))) { /* check whether we can define path to item: * <dir> + '/' + <name> + '/0' */ char newpath[CIRCLE_MAX_STRING_LEN]; size_t len = strlen(dir) + 1 + strlen(name) + 1; if (len < sizeof(newpath)) { /* build full path to item */ strcpy(newpath, dir); strcat(newpath, "/"); strcat(newpath, name); /* get type of item */ char d_type = *(buf + bpos + d->d_reclen - 1); #if 0 printf("%-10s ", (d_type == DT_REG) ? "regular" : (d_type == DT_DIR) ? "directory" : (d_type == DT_FIFO) ? "FIFO" : (d_type == DT_SOCK) ? "socket" : (d_type == DT_LNK) ? "symlink" : (d_type == DT_BLK) ? "block dev" : (d_type == DT_CHR) ? "char dev" : "???"); printf("%4d %10lld %s\n", d->d_reclen, (long long) d->d_off, (char*) d->d_name); #endif /* TODO: this is hacky, would be better to create list elem directly */ /* determine type of item (just need to set bits in mode * that get_mfu_filetype checks for) */ mode_t mode = 0; if (d_type == DT_REG) { mode |= S_IFREG; } else if (d_type == DT_DIR) { mode |= S_IFDIR; } else if (d_type == DT_LNK) { mode |= S_IFLNK; } /* insert a record for this item into our list */ mfu_flist_insert_stat(CURRENT_LIST, newpath, mode, NULL); /* increment our item count */ reduce_items++; /* recurse on directory if we have one */ if (d_type == DT_DIR) { handle->enqueue(newpath); } } else { MFU_LOG(MFU_LOG_ERR, "Path name is too long: %lu chars exceeds limit %lu", len, sizeof(newpath)); } } /* advance to next record */ bpos += d->d_reclen; } } mfu_close(dir, fd); return; }
/* write a chunk of the file */ static void write_file_chunk(mfu_file_chunk* p, const char* out_path) { size_t chunk_size = 1024*1024; uint64_t base = (off_t)p->offset; uint64_t file_size = (off_t)p->file_size; const char *in_path = p->name; uint64_t stripe_size = (off_t)p->length; /* if the file size is 0, there's no data to restripe */ /* if the stripe size is 0, then there's no work to be done */ if (file_size == 0 || stripe_size == 0) { return; } /* allocate buffer */ void* buf = MFU_MALLOC(chunk_size); /* open input file for reading */ int in_fd = mfu_open(in_path, O_RDONLY); if (in_fd < 0) { MFU_LOG(MFU_LOG_ERR, "Failed to open input file %s (%s)", in_path, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } /* open output file for writing */ int out_fd = mfu_open(out_path, O_WRONLY); if (out_fd < 0) { MFU_LOG(MFU_LOG_ERR, "Failed to open output file %s (%s)", out_path, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } /* write data */ uint64_t chunk_id = 0; uint64_t stripe_read = 0; while (stripe_read < stripe_size) { /* determine number of bytes to read */ /* try to read a full chunk's worth of bytes */ size_t read_size = chunk_size; /* if the stripe doesn't have that much left */ uint64_t remainder = stripe_size - stripe_read; if (remainder < (uint64_t) read_size) { read_size = (size_t) remainder; } /* get byte offset to read from */ uint64_t offset = base + (chunk_id * chunk_size); if (offset < file_size) { /* the first byte falls within the file size, * now check the last byte */ uint64_t last = offset + (uint64_t) read_size; if (last > file_size) { /* the last byte is beyond the end, set read size * to the most we can read */ read_size = (size_t) (file_size - offset); } } else { /* the first byte we need to read is past the end of * the file, so don't read anything */ read_size = 0; } /* bail if we don't have anything to read */ if (read_size == 0) { break; } /* seek to correct spot in input file */ off_t pos = (off_t) offset; off_t seek_rc = mfu_lseek(in_path, in_fd, pos, SEEK_SET); if (seek_rc == (off_t)-1) { MFU_LOG(MFU_LOG_ERR, "Failed to seek in input file %s (%s)", in_path, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } /* read chunk from input */ ssize_t nread = mfu_read(in_path, in_fd, buf, read_size); /* check for errors */ if (nread < 0) { MFU_LOG(MFU_LOG_ERR, "Failed to read data from input file %s (%s)", in_path, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } /* check for short reads */ if (nread != read_size) { MFU_LOG(MFU_LOG_ERR, "Got a short read from input file %s", in_path); MPI_Abort(MPI_COMM_WORLD, 1); } /* seek to correct spot in output file */ seek_rc = mfu_lseek(out_path, out_fd, pos, SEEK_SET); if (seek_rc == (off_t)-1) { MFU_LOG(MFU_LOG_ERR, "Failed to seek in output file %s (%s)", out_path, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } /* write chunk to output */ ssize_t nwrite = mfu_write(out_path, out_fd, buf, read_size); /* check for errors */ if (nwrite < 0) { MFU_LOG(MFU_LOG_ERR, "Failed to write data to output file %s (%s)", out_path, strerror(errno)); MPI_Abort(MPI_COMM_WORLD, 1); } /* check for short reads */ if (nwrite != read_size) { MFU_LOG(MFU_LOG_ERR, "Got a short write to output file %s", out_path); MPI_Abort(MPI_COMM_WORLD, 1); } /* update our byte count for progress messages */ stripe_prog_bytes += read_size; mfu_progress_update(&stripe_prog_bytes, stripe_prog); /* go on to the next chunk in this stripe, we assume we * read the whole chunk size, if we didn't it's because * the stripe is smaller or we're at the end of the file, * but in either case we're done so it doesn't hurt to * over estimate in this calculation */ stripe_read += (uint64_t) chunk_size; chunk_id++; } /* close files */ mfu_fsync(out_path, out_fd); mfu_close(out_path, out_fd); mfu_close(in_path, in_fd); /* free buffer */ mfu_free(&buf); }
static void mfu_flist_archive_create_libcircle(mfu_flist flist, const char* archivefile, mfu_archive_options_t* opts) { DTAR_flist = flist; DTAR_user_opts = *opts; MPI_Comm_rank(MPI_COMM_WORLD, &DTAR_rank); /* TODO: stripe the archive file if on parallel file system */ /* init statistics */ DTAR_statistics.total_dirs = 0; DTAR_statistics.total_files = 0; DTAR_statistics.total_links = 0; DTAR_statistics.total_size = 0; DTAR_statistics.total_bytes_copied = 0; time(&(DTAR_statistics.time_started)); DTAR_statistics.wtime_started = MPI_Wtime(); /* create the archive file */ DTAR_writer.name = archivefile; DTAR_writer.flags = O_WRONLY | O_CREAT | O_CLOEXEC | O_LARGEFILE; DTAR_writer.fd_tar = open(archivefile, DTAR_writer.flags, 0664); /* get number of items in our portion of the list */ DTAR_count = mfu_flist_size(DTAR_flist); /* allocate memory for file sizes and offsets */ uint64_t* fsizes = (uint64_t*) MFU_MALLOC(DTAR_count * sizeof(uint64_t)); DTAR_offsets = (uint64_t*) MFU_MALLOC(DTAR_count * sizeof(uint64_t)); /* compute local offsets for each item and total * bytes we're contributing to the archive */ uint64_t idx; uint64_t offset = 0; for (idx = 0; idx < DTAR_count; idx++) { /* assume the item takes no space */ fsizes[idx] = 0; /* identify item type to compute its size in the archive */ mfu_filetype type = mfu_flist_file_get_type(DTAR_flist, idx); if (type == MFU_TYPE_DIR || type == MFU_TYPE_LINK) { /* directories and symlinks only need the header */ fsizes[idx] = DTAR_HDR_LENGTH; } else if (type == MFU_TYPE_FILE) { /* regular file requires a header, plus file content, * and things are packed into blocks of 512 bytes */ uint64_t fsize = mfu_flist_file_get_size(DTAR_flist, idx); /* determine whether file size is integer multiple of 512 bytes */ uint64_t rem = fsize % 512; if (rem == 0) { /* file content is multiple of 512 bytes, so perfect fit */ fsizes[idx] = fsize + DTAR_HDR_LENGTH; } else { /* TODO: check and explain this math */ fsizes[idx] = (fsize / 512 + 4) * 512; } } /* increment our local offset for this item */ DTAR_offsets[idx] = offset; offset += fsizes[idx]; } /* execute scan to figure our global base offset in the archive file */ uint64_t global_offset = 0; MPI_Scan(&offset, &global_offset, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); global_offset -= offset; /* update offsets for each of our file to their global offset */ for (idx = 0; idx < DTAR_count; idx++) { DTAR_offsets[idx] += global_offset; } /* create an archive */ struct archive* ar = archive_write_new(); archive_write_set_format_pax(ar); int r = archive_write_open_fd(ar, DTAR_writer.fd_tar); if (r != ARCHIVE_OK) { MFU_LOG(MFU_LOG_ERR, "archive_write_open_fd(): %s", archive_error_string(ar)); DTAR_abort(EXIT_FAILURE); } /* write headers for our files */ for (idx = 0; idx < DTAR_count; idx++) { mfu_filetype type = mfu_flist_file_get_type(DTAR_flist, idx); if (type == MFU_TYPE_FILE || type == MFU_TYPE_DIR || type == MFU_TYPE_LINK) { DTAR_write_header(ar, idx, DTAR_offsets[idx]); } } /* prepare libcircle */ CIRCLE_init(0, NULL, CIRCLE_SPLIT_EQUAL | CIRCLE_CREATE_GLOBAL); CIRCLE_loglevel loglevel = CIRCLE_LOG_WARN; CIRCLE_enable_logging(loglevel); /* register callbacks */ CIRCLE_cb_create(&DTAR_enqueue_copy); CIRCLE_cb_process(&DTAR_perform_copy); /* run the libcircle job to copy data into archive file */ CIRCLE_begin(); CIRCLE_finalize(); /* compute total bytes copied */ uint64_t archive_size = 0; MPI_Allreduce(&offset, &archive_size, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); DTAR_statistics.total_size = archive_size; DTAR_statistics.wtime_ended = MPI_Wtime(); time(&(DTAR_statistics.time_ended)); /* print stats */ double rel_time = DTAR_statistics.wtime_ended - \ DTAR_statistics.wtime_started; if (DTAR_rank == 0) { char starttime_str[256]; struct tm* localstart = localtime(&(DTAR_statistics.time_started)); strftime(starttime_str, 256, "%b-%d-%Y, %H:%M:%S", localstart); char endtime_str[256]; struct tm* localend = localtime(&(DTAR_statistics.time_ended)); strftime(endtime_str, 256, "%b-%d-%Y, %H:%M:%S", localend); /* add two 512 blocks at the end */ DTAR_statistics.total_size += 512 * 2; /* convert bandwidth to unit */ double agg_rate_tmp; double agg_rate = (double) DTAR_statistics.total_size / rel_time; const char* agg_rate_units; mfu_format_bytes(agg_rate, &agg_rate_tmp, &agg_rate_units); MFU_LOG(MFU_LOG_INFO, "Started: %s", starttime_str); MFU_LOG(MFU_LOG_INFO, "Completed: %s", endtime_str); MFU_LOG(MFU_LOG_INFO, "Total archive size: %" PRIu64, DTAR_statistics.total_size); MFU_LOG(MFU_LOG_INFO, "Rate: %.3lf %s " \ "(%.3" PRIu64 " bytes in %.3lf seconds)", \ agg_rate_tmp, agg_rate_units, DTAR_statistics.total_size, rel_time); } /* clean up */ mfu_free(&fsizes); mfu_free(&DTAR_offsets); /* close archive file */ archive_write_free(ar); mfu_close(DTAR_writer.name, DTAR_writer.fd_tar); }
/* open the specified file, read specified chunk, and close file, * returns -1 on any read error */ static int read_data(const char* fname, char* chunk_buf, uint64_t chunk_id, uint64_t chunk_size, uint64_t file_size, uint64_t* data_size) { int status = 0; assert(chunk_id > 0); /* compute byte offset to read from in file */ uint64_t offset = (chunk_id - 1) * chunk_size; /* zero out our buffer */ memset(chunk_buf, 0, chunk_size); /* open the file */ int fd = mfu_open(fname, O_RDONLY); if (fd < 0) { return -1; } /* seek to the correct offset */ if (mfu_lseek(fname, fd, (off_t)offset, SEEK_SET) == (off_t) - 1) { status = -1; goto out; } /* read data from file */ ssize_t read_size = mfu_read(fname, fd, chunk_buf, chunk_size); if (read_size < 0) { /* read failed */ status = -1; goto out; } /* compute number of bytes we expect to read */ ssize_t expect_size = (ssize_t) chunk_size; if (chunk_id * chunk_size > file_size) { if (offset >= file_size) { /* have gone past the end of the file, expect to read 0 */ expect_size = 0; } else { /* read partial chunk */ expect_size = (ssize_t) (file_size - offset); } } /* check that we read all bytes we expected */ if (read_size != expect_size) { /* File size has been changed */ status = -1; goto out; } /* return number of bytes read */ *data_size = (uint64_t)read_size; out: /* close our file and return */ mfu_close(fname, fd); return status; }
int DCOPY_open_file(const char* file, int read_flag, DCOPY_file_cache_t* cache) { int newfd = -1; /* see if we have a cached file descriptor */ char* name = cache->name; if (name != NULL) { /* we have a cached file descriptor */ int fd = cache->fd; if (strcmp(name, file) == 0 && cache->read == read_flag) { /* the file we're trying to open matches name and read/write mode, * so just return the cached descriptor */ return fd; } else { /* the file we're trying to open is different, * close the old file and delete the name */ mfu_close(name, fd); mfu_free(&cache->name); } } /* open the new file */ if (read_flag) { int flags = O_RDONLY; if (DCOPY_user_opts.synchronous) { flags |= O_DIRECT; } newfd = mfu_open(file, flags); } else { int flags = O_WRONLY | O_CREAT; if (DCOPY_user_opts.synchronous) { flags |= O_DIRECT; } newfd = mfu_open(file, flags, DCOPY_DEF_PERMS_FILE); } /* cache the file descriptor */ if (newfd != -1) { cache->name = MFU_STRDUP(file); cache->fd = newfd; cache->read = read_flag; #ifdef LUSTRE_SUPPORT /* Zero is an invalid ID for grouplock. */ if (DCOPY_user_opts.grouplock_id != 0) { int rc; rc = ioctl(newfd, LL_IOC_GROUP_LOCK, DCOPY_user_opts.grouplock_id); if (rc) { MFU_LOG(MFU_LOG_ERR, "Failed to obtain grouplock with ID %d " "on file `%s', ignoring this error: %s", DCOPY_user_opts.grouplock_id, file, strerror(errno)); } else { MFU_LOG(MFU_LOG_INFO, "Obtained grouplock with ID %d " "on file `%s', fd %d", DCOPY_user_opts.grouplock_id, file, newfd); } } #endif } return newfd; }