/* see: http://stackoverflow.com/a/3756466/965672 */ static off_t sync_and_drop_write_cache(int fd, off_t pos) { #ifdef SYNC_FILE_RANGE_WRITE off_t tmp = lseek(fd, 0, SEEK_CUR) & (~(SYNC_BUFFER_SIZE - 1)); if (tmp == pos) return pos; // sync pos, async if (sync_file_range(fd, pos, SYNC_BUFFER_SIZE, SYNC_FILE_RANGE_WRITE) < 0) perror("sync_file_range"); if (pos == 0) return tmp; pos -= SYNC_BUFFER_SIZE; // wait for previous sync to finish if (sync_file_range(fd, pos, SYNC_BUFFER_SIZE, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER) < 0) perror("sync_file_range"); // drop cache pages if (posix_fadvise(fd, pos, SYNC_BUFFER_SIZE, POSIX_FADV_DONTNEED) < 0) perror("posix_fadvise"); return tmp; #else return 0; #endif }
static void tar_writeback_barrier(struct fileinlist *files, struct pkginfo *pkg) { struct fileinlist *cfile; for (cfile = files; cfile; cfile = cfile->next) { struct filenamenode *usenode; const char *usename; int fd; if (!(cfile->namenode->flags & fnnf_deferred_fsync)) continue; usenode = namenodetouse(cfile->namenode, pkg); usename = usenode->name + 1; /* Skip the leading '/'. */ setupfnamevbs(usename); fd = open(fnamenewvb.buf, O_WRONLY); if (fd < 0) ohshite(_("unable to open '%.255s'"), fnamenewvb.buf); sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE); if (close(fd)) ohshite(_("error closing/writing `%.255s'"), fnamenewvb.buf); } }
static int pre_sync_fname(const char *fname, bool isdir, const char *progname) { int fd; fd = open(fname, O_RDONLY | PG_BINARY); if (fd < 0) { if (errno == EACCES || (isdir && errno == EISDIR)) return 0; fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), progname, fname, strerror(errno)); return -1; } /* * We do what pg_flush_data() would do in the backend: prefer to use * sync_file_range, but fall back to posix_fadvise. We ignore errors * because this is only a hint. */ #if defined(HAVE_SYNC_FILE_RANGE) (void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE); #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED) (void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); #else #error PG_FLUSH_DATA_WORKS should not have been defined #endif (void) close(fd); return 0; }
virtual int setup() override { init_paths(); // Create test directory A. string dir_path = mnt_dir_ + "/" TEST_DIR_A; int res = mkdir(dir_path.c_str(), 0777); if (res < 0) { return -1; } //Create file foo in TEST_DIR_A const int fd_foo = open(foo_path.c_str(), O_RDWR | O_CREAT, TEST_FILE_PERMS); if (fd_foo < 0) { return -1; } const int fd_foo_backup = open(foo_backup_path.c_str(), O_RDWR | O_CREAT, TEST_FILE_PERMS); if (fd_foo_backup < 0) { return -1; } // Write some contents to the file if (WriteData(fd_foo, 0, 4096) < 0) { return -2; } // Write some contents to the backup file (for verifying md5sum in check_test) if (WriteData(fd_foo_backup, 0, 4096) < 0) { return -2; } // Sync the file and backup file if (fsync(fd_foo) < 0) { return -1; } if (fsync(fd_foo_backup) < 0) { return -1; } // write more contents in a different offset if (WriteData(fd_foo, 4096, 4096) < 0) { return -2; } if (WriteData(fd_foo_backup, 4096, 4096) < 0) { return -2; } // sync range the foo file if (sync_file_range(fd_foo, 4096, 4096, 0) < 0) { return -3; } // fsync the entire backup file if (fsync(fd_foo_backup) < 0) { return -1; } close(fd_foo); close(fd_foo_backup); return 0; }
static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs, const struct iovec *iov, int iovcnt, off_t offset) { ssize_t ret ; #ifdef CONFIG_PREADV ret = pwritev(fs->fd, iov, iovcnt, offset); #else int err = lseek(fs->fd, offset, SEEK_SET); if (err == -1) { return err; } else { ret = writev(fs->fd, iov, iovcnt); } #endif #ifdef CONFIG_SYNC_FILE_RANGE if (ret > 0 && ctx->export_flags & V9FS_IMMEDIATE_WRITEOUT) { /* * Initiate a writeback. This is not a data integrity sync. * We want to ensure that we don't leave dirty pages in the cache * after write when writeout=immediate is sepcified. */ sync_file_range(fs->fd, offset, ret, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE); } #endif return ret; }
void File::WriteOut(unsigned int pageno) { if (m_fd != -1) { msync(Page(pageno), PAGE, MS_ASYNC); sync_file_range(m_fd, pageno*4096ull, 4096, SYNC_FILE_RANGE_WRITE); } }
int compat_sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags) { #ifdef HAVE_SYNC_FILE_RANGE return sync_file_range(fd, offset, nbytes, flags); #else return fdatasync(fd); #endif }
static inline void fd_writeback_init(int fd) { #if defined(SYNC_FILE_RANGE_WRITE) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE); #elif defined(HAVE_POSIX_FADVISE) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); #endif }
int write_junk(const char *fname, int flags, int sync_options, uint64_t file_size) { int fd, len; uint64_t offset, generation = 0; char *buf; len = posix_memalign((void **)&buf, bufsize, bufsize); if (len) { errno = len; perror("alloc"); return 66; } fd = open(fname, flags | O_WRONLY); if (fd < 1) { perror(fname); return 64; } while (1) { len = snprintf(buf, bufsize - 1, "%d - %"PRIu64, getpid(), generation++); if (flags & O_DIRECT) { len = bufsize; offset = get_randnum_align(0, file_size - len, bufsize); } else { offset = get_randnum(0, file_size - len); } if (pwrite(fd, buf, len, offset) < 0) { perror("pwrite"); close(fd); free(buf); return 65; } if ((sync_options & SYNC_RANGE) && sync_file_range(fd, offset, len, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER) < 0) { perror("sync_file_range"); close(fd); free(buf); return 67; } if ((sync_options & SYNC_FILE) && fsync(fd)) { perror("fsync"); close(fd); free(buf); return 68; } } return 0; }
static int pfile_async_force(stasis_handle_t *h) { TICK(force_range_hist); pfile_impl * impl = h->impl; #ifdef HAVE_SYNC_FILE_RANGE // stop of zero syncs to eof. DEBUG("pfile_force_range calling sync_file_range %lld %lld\n", start, stop-start); fflush(stdout); int ret = sync_file_range(impl->fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE); ret |= sync_file_range(impl->fd, 0, 0, SYNC_FILE_RANGE_WRITE); if(ret) { int error = errno; assert(ret == -1); // With the possible exceptions of ENOMEM and ENOSPACE, all of the sync // errors are unrecoverable. h->error = EBADF; ret = error; } #else #ifdef HAVE_FDATASYNC DEBUG("pfile_force_range() is calling fdatasync()\n"); fdatasync(impl->fd); #else DEBUG("pfile_force_range() is calling fsync()\n"); fsync(impl->fd); #endif int ret = 0; #endif #ifdef HAVE_POSIX_FADVISE if(impl->sequential) { int err = posix_fadvise(impl->fd, 0, 0, POSIX_FADV_DONTNEED); if(err) perror("Attempt to pass POSIX_FADV_SEQUENTIAL (for a range of a file) to kernel failed"); } #endif TOCK(force_range_hist); return ret; }
/* * __posix_file_sync_nowait -- * POSIX fsync. */ static int __posix_file_sync_nowait(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session) { WT_DECL_RET; WT_FILE_HANDLE_POSIX *pfh; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; pfh = (WT_FILE_HANDLE_POSIX *)file_handle; WT_SYSCALL_RETRY(sync_file_range(pfh->fd, (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret); if (ret == 0) return (0); WT_RET_MSG(session, ret, "%s: handle-sync-nowait: sync_file_range", file_handle->name); }
static void pfForceRangePageFile(stasis_page_handle_t * h, lsn_t start, lsn_t stop) { if(pageFile_isDurable) { #ifdef HAVE_SYNC_FILE_RANGE int ret = sync_file_range(stable, start, stop, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER); assert(!ret); #else #ifdef HAVE_FDATASYNC fdatasync(stable); #else fsync(stable); #endif #endif } }
/* * __wt_fsync_async -- * Flush a file handle and don't wait for the result. */ int __wt_fsync_async(WT_SESSION_IMPL *session, WT_FH *fh) { #ifdef HAVE_SYNC_FILE_RANGE WT_DECL_RET; WT_RET(__wt_verbose( session, WT_VERB_FILEOPS, "%s: sync_file_range", fh->name)); if ((ret = sync_file_range(fh->fd, (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE)) == 0) return (0); WT_RET_MSG(session, ret, "%s: sync_file_range", fh->name); #else WT_UNUSED(session); WT_UNUSED(fh); return (0); #endif }
int main(void) { const int fd = -1; const off64_t offset = 0xdeadbeefbadc0ded; const off64_t nbytes = 0xfacefeedcafef00d; const unsigned int flags = -1; int rc = sync_file_range(fd, offset, nbytes, flags); printf("%s(%d, SYNC_FILE_RANGE_WAIT_BEFORE" "|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER" "|0xfffffff8, %lld, %lld) = %d %s (%m)\n", "sync_file_range2", fd, (long long) offset, (long long) nbytes, rc, errno2name()); puts("+++ exited with 0 +++"); return 0; }
static void close_was_called(int fd) { struct fd_status *fds; if (pagecache_max_bytes == 0) return; fds = get_fd_status(fd); if (fds->bytes_written > 0 ) { if (time(NULL)-fds->seconds > wait_secs ) { /* >= /proc/sys/vm/dirty_writeback_centisecs */ sync_file_range(fd, 0, LONG_MAX, SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER); pagecache_size_write -= fds->bytes_written; fds->bytes_written=0; #ifdef COUNT_READS }} posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); #else posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); }}
void* compact_func(void* arg) { operation_stats* stats = (operation_stats*) arg; char** data; struct timeval start; struct drand48_data ctx; int* input_x; DFILE* new_dfiles; char** old_fnames; int* old_fds; int i; long long read_size = data_block_size * compact_read_blocks; char* zbuf = (char*) malloc(data_block_size*3); int zbuf_len = data_block_size*3; double usecs_per_loop = 1000000.0 * secs_per_loop_per_thr; struct timeval rate_start; now(&rate_start); old_fds = (int*) malloc(sizeof(int) * (fanout+1)); old_fnames = (char**) malloc(sizeof(char*) * (fanout+1)); data = (char**) malloc(sizeof(char*) * (fanout+2)); for (i=0; i < (fanout+2); ++i) assert(!posix_memalign((void**) &data[i], data_block_size, read_size)); input_x = (int*) malloc(sizeof(int) * (fanout+1)); new_dfiles = (DFILE*) malloc(sizeof(DFILE) * (fanout+1)); init_rand_ctx(&ctx); while (!shutdown) { longlong read_offset = 0; longlong output_offset = 0; int output_x = 0; longlong bytes_read = 0; longlong bytes_written = 0; /* When a compaction uses "cached reads" all of the compaction reads use the same input file to simulate most of the reads hitting in the OS filesystem cache. */ int uncached_reads = rand_choose(&ctx, 100) < compact_read_miss_pct; for (i=0; i < (fanout+1); ++i) { int x = lock_random_file(&ctx); input_x[i] = x; set_compact_options(dfiles[x].dfile_fd); } now(&start); i = get_recycled_files(new_dfiles, fanout+1); for (; i < (fanout+1); ++i) open_file(1, &new_dfiles[i], NULL); while (read_offset < data_file_size) { for (i=0; i < (fanout+1); ++i) { int b; int read_fd_idx = uncached_reads ? input_x[i] : 0; check_pread(dfiles[read_fd_idx].dfile_fd, data[i], read_size, read_offset, "compact", dfiles[read_fd_idx].dfile_fname); if (uncached_reads) bytes_read += read_size; for (b=0; b < compact_read_blocks; ++b) { page_check_checksum(data[i] + (b * data_block_size)); if (compress_level) decompress_page(zbuf, zbuf_len); } } read_offset += read_size; for (i=0; i < (fanout+1); ++i) { int b; for (b=0; b < compact_read_blocks; ++b) { page_write_checksum(data[i] + (b * data_block_size), i+b); if (compress_level) compress_page(compressed_page, compressed_page_len, zbuf, zbuf_len); } check_write(new_dfiles[output_x].dfile_fd, data[i], read_size, "compaction write"); bytes_written += read_size; output_offset += read_size; if (output_offset >= data_file_size) { new_dfiles[output_x].dfile_len = output_offset; if (use_sync_file_range) { sync_file_range(new_dfiles[output_x].dfile_fd, 0, 0, SYNC_FILE_RANGE_WRITE); } else { sync_after_writes(new_dfiles[output_x].dfile_fd); } ++output_x; output_offset = 0; } } } if (use_sync_file_range) for (i=0; i < (fanout+1); ++i) sync_after_writes(new_dfiles[i].dfile_fd); stats_report(stats, &start, &ctx, bytes_read, bytes_written); assert(output_x == (fanout+1)); pthread_mutex_lock(&dfiles_mutex); for (i=0; i < (fanout+1); ++i) { DFILE* old_f = &dfiles[input_x[i]]; DFILE* new_f = &new_dfiles[i]; old_fnames[i] = old_f->dfile_fname; old_fds[i] = old_f->dfile_fd; *old_f = *new_f; } pthread_mutex_unlock(&dfiles_mutex); handle_old_files(fanout+1, old_fds, old_fnames); if (write_bytes_per_second) { struct timeval rate_cur; long usecs_elapsed; now(&rate_cur); usecs_elapsed = now_minus_then_usecs(&rate_cur, &rate_start); if (usecs_elapsed > 0 && usecs_elapsed < usecs_per_loop) { usleep(usecs_per_loop - usecs_elapsed); now(&rate_start); } else { rate_start = rate_cur; } } } for (i=0; i < (fanout+2); ++i) free(data[i]); free(data); free(input_x); free(new_dfiles); free(old_fnames); free(old_fds); free(zbuf); return NULL; }
/* * Randomly write inside of a file, either creating a sparse file or prealloc * the file and randomly write within it, depending on the prealloc flag */ static int test_three(int *max_blocks, int prealloc, int rand_fsync, int do_sync, int drop_caches) { int size = (random() % 2048) + 4; int blocks = size / 2; int sync_block = blocks / 2; int rand_sync_interval = (random() % blocks) + 1; int character = (random() % 126) + 33; if (prealloc && fallocate(test_fd, 0, 0, size * 4096)) { fprintf(stderr, "Error fallocating %d (%s)\n", errno, strerror(errno)); return 1; } if (prealloc) *max_blocks = size; memset(buf, character, 4096); while (blocks--) { int block = (random() % size); if ((block + 1) > *max_blocks) *max_blocks = block + 1; if (rand_fsync && !(blocks % rand_sync_interval)) { if (fsync(test_fd)) { fprintf(stderr, "Fsync failed, test results " "will be invalid: %d\n", errno); return 1; } } /* Force a transaction commit in between just for fun */ if (blocks == sync_block && (do_sync || drop_caches)) { if (do_sync) sync(); else sync_file_range(test_fd, 0, 0, SYNC_FILE_RANGE_WRITE| SYNC_FILE_RANGE_WAIT_AFTER); if (drop_caches) { close(test_fd); drop_all_caches(); test_fd = open(fname, O_RDWR); if (test_fd < 0) { test_fd = 0; fprintf(stderr, "Error re-opening file: %d\n", errno); return 1; } } } if (pwrite(test_fd, buf, 4096, block * 4096) < 4096) { fprintf(stderr, "Short write %d\n", errno); return 1; } } return 0; }
int main(void) { return sync_file_range(0, 0, 1024, 0); }
/* * __wt_block_write_off -- * Write a buffer into a block, returning the block's addr/size and * checksum. */ int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, off_t *offsetp, uint32_t *sizep, uint32_t *cksump, int data_cksum, int locked) { WT_BLOCK_HEADER *blk; WT_DECL_RET; WT_FH *fh; off_t offset; uint32_t align_size; blk = WT_BLOCK_HEADER_REF(buf->mem); fh = block->fh; /* Buffers should be aligned for writing. */ if (!F_ISSET(buf, WT_ITEM_ALIGNED)) { WT_ASSERT(session, F_ISSET(buf, WT_ITEM_ALIGNED)); WT_RET_MSG(session, EINVAL, "direct I/O check: write buffer incorrectly allocated"); } /* * Align the size to an allocation unit. * * The buffer must be big enough for us to zero to the next allocsize * boundary, this is one of the reasons the btree layer must find out * from the block-manager layer the maximum size of the eventual write. */ align_size = (uint32_t)WT_ALIGN(buf->size, block->allocsize); if (align_size > buf->memsize) { WT_ASSERT(session, align_size <= buf->memsize); WT_RET_MSG(session, EINVAL, "buffer size check: write buffer incorrectly allocated"); } /* Zero out any unused bytes at the end of the buffer. */ memset((uint8_t *)buf->mem + buf->size, 0, align_size - buf->size); /* * Set the disk size so we don't have to incrementally read blocks * during salvage. */ blk->disk_size = align_size; /* * Update the block's checksum: if our caller specifies, checksum the * complete data, otherwise checksum the leading WT_BLOCK_COMPRESS_SKIP * bytes. The assumption is applications with good compression support * turn off checksums and assume corrupted blocks won't decompress * correctly. However, if compression failed to shrink the block, the * block wasn't compressed, in which case our caller will tell us to * checksum the data to detect corruption. If compression succeeded, * we still need to checksum the first WT_BLOCK_COMPRESS_SKIP bytes * because they're not compressed, both to give salvage a quick test * of whether a block is useful and to give us a test so we don't lose * the first WT_BLOCK_COMPRESS_SKIP bytes without noticing. */ blk->flags = 0; if (data_cksum) F_SET(blk, WT_BLOCK_DATA_CKSUM); blk->cksum = 0; blk->cksum = __wt_cksum( buf->mem, data_cksum ? align_size : WT_BLOCK_COMPRESS_SKIP); if (!locked) __wt_spin_lock(session, &block->live_lock); ret = __wt_block_alloc(session, block, &offset, (off_t)align_size); if (!locked) __wt_spin_unlock(session, &block->live_lock); WT_RET(ret); #if defined(HAVE_POSIX_FALLOCATE) || defined(HAVE_FTRUNCATE) /* * Extend the file in chunks. We aren't holding a lock and we'd prefer * to limit the number of threads extending the file at the same time, * so choose the one thread that's crossing the extended boundary. We * don't extend newly created files, and it's theoretically possible we * might wait so long our extension of the file is passed by another * thread writing single blocks, that's why there's a check in case the * extended file size becomes too small: if the file size catches up, * every thread will try to extend it. */ if (fh->extend_len != 0 && (fh->extend_size <= fh->size || (offset + fh->extend_len <= fh->extend_size && offset + fh->extend_len + align_size >= fh->extend_size))) { fh->extend_size = offset + fh->extend_len * 2; #if defined(HAVE_POSIX_FALLOCATE) if ((ret = posix_fallocate(fh->fd, offset, fh->extend_len * 2)) != 0) WT_RET_MSG( session, ret, "%s: posix_fallocate", fh->name); #elif defined(HAVE_FTRUNCATE) if ((ret = ftruncate(fh->fd, fh->extend_size)) != 0) WT_RET_MSG(session, ret, "%s: ftruncate", fh->name); #endif } #endif if ((ret = __wt_write(session, fh, offset, align_size, buf->mem)) != 0) { if (!locked) __wt_spin_lock(session, &block->live_lock); WT_TRET( __wt_block_off_free(session, block, offset, align_size)); if (!locked) __wt_spin_unlock(session, &block->live_lock); WT_RET(ret); } #ifdef HAVE_SYNC_FILE_RANGE /* * Optionally schedule writes for dirty pages in the system buffer * cache. */ if (block->os_cache_dirty_max != 0 && (block->os_cache_dirty += align_size) > block->os_cache_dirty_max) { block->os_cache_dirty = 0; if ((ret = sync_file_range(fh->fd, (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE)) != 0) WT_RET_MSG( session, ret, "%s: sync_file_range", block->name); } #endif #ifdef HAVE_POSIX_FADVISE /* Optionally discard blocks from the system buffer cache. */ if (block->os_cache_max != 0 && (block->os_cache += align_size) > block->os_cache_max) { block->os_cache = 0; if ((ret = posix_fadvise(fh->fd, (off_t)0, (off_t)0, POSIX_FADV_DONTNEED)) != 0) WT_RET_MSG( session, ret, "%s: posix_fadvise", block->name); } #endif WT_CSTAT_INCR(session, block_write); WT_CSTAT_INCRV(session, block_byte_write, align_size); WT_VERBOSE_RET(session, write, "off %" PRIuMAX ", size %" PRIu32 ", cksum %" PRIu32, (uintmax_t)offset, align_size, blk->cksum); *offsetp = offset; *sizep = align_size; *cksump = blk->cksum; return (ret); }
int main(int argc, char *argv[]) { int fd; int sem; int nrpages = 1; int ret = 0; int tmp = 0; int offset = 0; char c; char *filename; char *actype; char *onerror; char *p; pid_t pid; int wait_status; uint64_t pflag; struct sembuf sembuf; struct pagestat pgstat; if (argc != 5) { printf("Usage: %s filename nrpages accesstype onerror\n", argv[0]); exit(EXIT_FAILURE); } filename = argv[1]; nrpages = strtol(argv[2], NULL, 10); actype = argv[3]; onerror = argv[4]; DEB("filename = %s, nrpages = %d, actype = %s, onerror = %s\n", filename, nrpages, actype, onerror); if (strcmp(onerror, "onerror") == 0) offset = 0; else offset = PS; sem = create_and_init_semaphore(); fd = open_check(filename, O_RDWR, 0); tmp = pread(fd, rbuf, nrpages*PS, 0); DEB("parent first read %d [%c,%c]\n", tmp, rbuf[0], rbuf[PS]); get_semaphore(sem, &sembuf); if ((pid = fork()) == 0) { get_semaphore(sem, &sembuf); /* wait parent to dirty page */ p = mmap_check((void *)REFADDR, nrpages * PS, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (p != (void *)REFADDR) err("mmap"); if (nrpages == 1) { DEB("child read (after dirty) [%c]\n", p[0]); #ifdef DEBUG get_pagestat(p, &pgstat); #endif } else { DEB("child read (after dirty) [%c,%c]\n", p[0], p[PS]); #ifdef DEBUG get_pagestat(p, &pgstat); get_pagestat(p+PS, &pgstat); #endif } DEB("child hwpoison to vaddr %p\n", p); madvise(&p[0], PS, 100); /* hwpoison */ put_semaphore(sem, &sembuf); get_semaphore(sem, &sembuf); DEB("child terminated\n"); put_semaphore(sem, &sembuf); get_pflags(pgstat.pfn, &pflag, 1); exit(EXIT_SUCCESS); } else { DEB("parent dirty\n"); usleep(1000); memset(wbuf, 49, nrpages * PS); pwrite(fd, wbuf, nrpages * PS, 0); tmp = pread(fd, rbuf, nrpages * PS, 0); DEB("parent second read (after dirty) %d [%c,%c]\n", tmp, rbuf[0], rbuf[PS]); put_semaphore(sem, &sembuf); /* kick child to inject error */ get_semaphore(sem, &sembuf); /* pagecache should be hwpoison */ DEB("parent check\n"); if (strcmp(actype, "read") == 0) { tmp = pread(fd, rbuf, PS, offset); if (tmp < 0) DEB("parent first read failed.\n"); tmp = pread(fd, rbuf, PS, offset); DEB("parent read after hwpoison %d [%c,%c]\n", tmp, rbuf[0], rbuf[PS]); if (tmp < 0) { ret = -1; perror("read"); } else { ret = 0; } } else if (strcmp(actype, "writefull") == 0) { memset(wbuf, 50, nrpages * PS); tmp = pwrite(fd, wbuf, PS, offset); tmp = pwrite(fd, wbuf, PS, offset); DEB("parent write after hwpoison %d\n", tmp); if (tmp < 0) { ret = -1; perror("writefull"); } else { ret = 0; } } else if (strcmp(actype, "writepart") == 0) { memset(wbuf, 50, nrpages * PS); tmp = pwrite(fd, wbuf, PS / 2, offset); tmp = pwrite(fd, wbuf, PS / 2, offset); DEB("parent write after hwpoison %d\n", tmp); if (tmp < 0) { ret = -1; perror("writefull"); } else { ret = 0; } } else if (strcmp(actype, "fsync") == 0) { ret = fsync(fd); ret = fsync(fd); DEB("parent fsync after hwpoison [ret %d]\n", ret); if (ret) perror("fsync"); } else if (strcmp(actype, "sync_range_write") == 0) { ret = sync_file_range(fd, offset, PS, SYNC_FILE_RANGE_WRITE); ret = sync_file_range(fd, offset, PS, SYNC_FILE_RANGE_WRITE); if (ret) perror("sync_range_write"); } else if (strcmp(actype, "sync_range_wait") == 0) { ret = sync_file_range(fd, offset, PS, SYNC_FILE_RANGE_WAIT_BEFORE); ret = sync_file_range(fd, offset, PS, SYNC_FILE_RANGE_WAIT_BEFORE); if (ret) perror("sync_range_wait"); } else if (strcmp(actype, "mmapread") == 0) { /* * If mmap access failed, this program should be * terminated by segmentation fault with non-zero * returned value. So we don't set ret here. */ p = mmap_check((void *)REFADDR, nrpages * PS, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (p != (void *)REFADDR) err("mmap"); c = p[offset]; DEB("parent mmap() read after hwpoison [%c]\n", p[offset]); } else if (strcmp(actype, "mmapwrite") == 0) { p = mmap_check((void *)REFADDR, nrpages * PS, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (p != (void *)REFADDR) err("mmap"); memset(&p[offset], 50, PS); DEB("parent mmap() write after hwpoison [%c]\n", p[offset]); } } put_semaphore(sem, &sembuf); waitpid(pid, &wait_status, 0); if (!WIFEXITED(wait_status)) err("waitpid"); delete_semaphore(sem); DEB("parent exit %d.\n", ret); return ret; }