/* * heap_init -- initializes the heap * * If successful function returns zero. Otherwise an error number is returned. */ int heap_init(PMEMobjpool *pop) { if (pop->heap_size < HEAP_MIN_SIZE) return EINVAL; struct heap_layout *layout = heap_get_layout(pop); heap_write_header(&layout->header, pop->heap_size); pmem_msync(&layout->header, sizeof (struct heap_header)); int zones = heap_max_zone(pop->heap_size); for (int i = 0; i < zones; ++i) { memset(&layout->zones[i].header, 0, sizeof (layout->zones[i].header)); memset(&layout->zones[i].chunk_headers, 0, sizeof (layout->zones[i].chunk_headers)); pmem_msync(&layout->zones[i].header, sizeof (layout->zones[i].header)); pmem_msync(&layout->zones[i].chunk_headers, sizeof (layout->zones[i].chunk_headers)); } return 0; }
/* * pmemlog_persist -- (internal) persist data, then metadata * * On entry, the write lock should be held. */ static void pmemlog_persist(PMEMlogpool *plp, uint64_t new_write_offset) { uint64_t old_write_offset = le64toh(plp->write_offset); size_t length = new_write_offset - old_write_offset; /* unprotect the log space range (debug version only) */ RANGE_RW(plp->addr + old_write_offset, length); /* persist the data */ if (plp->is_pmem) pmem_drain(); /* data already flushed */ else pmem_msync(plp->addr + old_write_offset, length); /* protect the log space range (debug version only) */ RANGE_RO(plp->addr + old_write_offset, length); /* unprotect the pool descriptor (debug version only) */ RANGE_RW(plp->addr + sizeof (struct pool_hdr), LOG_FORMAT_DATA_ALIGN); /* write the metadata */ plp->write_offset = htole64(new_write_offset); /* persist the metadata */ if (plp->is_pmem) pmem_persist(&plp->write_offset, sizeof (plp->write_offset)); else pmem_msync(&plp->write_offset, sizeof (plp->write_offset)); /* set the write-protection again (debug version only) */ RANGE_RO(plp->addr + sizeof (struct pool_hdr), LOG_FORMAT_DATA_ALIGN); }
/* * pmemobj_descr_create -- (internal) create obj pool descriptor */ static int pmemobj_descr_create(PMEMobjpool *pop, const char *layout, size_t poolsize) { LOG(3, "pop %p layout %s poolsize %zu", pop, layout, poolsize); ASSERTeq(poolsize % Pagesize, 0); /* opaque info lives at the beginning of mapped memory pool */ void *dscp = (void *)((uintptr_t)(&pop->hdr) + sizeof (struct pool_hdr)); /* create the persistent part of pool's descriptor */ memset(dscp, 0, OBJ_DSC_P_SIZE); if (layout) strncpy(pop->layout, layout, PMEMOBJ_MAX_LAYOUT - 1); /* initialize run_id, it will be incremented later */ pop->run_id = 0; pmem_msync(&pop->run_id, sizeof (pop->run_id)); pop->lanes_offset = OBJ_LANES_OFFSET; pop->nlanes = OBJ_NLANES; /* zero all lanes */ void *lanes_layout = (void *)((uintptr_t)pop + pop->lanes_offset); memset(lanes_layout, 0, pop->nlanes * sizeof (struct lane_layout)); pmem_msync(lanes_layout, pop->nlanes * sizeof (struct lane_layout)); /* initialization of the obj_store */ pop->obj_store_offset = pop->lanes_offset + pop->nlanes * sizeof (struct lane_layout); pop->obj_store_size = (PMEMOBJ_NUM_OID_TYPES + 1) * sizeof (struct object_store_item); /* + 1 - for root object */ void *store = (void *)((uintptr_t)pop + pop->obj_store_offset); memset(store, 0, pop->obj_store_size); pmem_msync(store, pop->obj_store_size); pop->heap_offset = pop->obj_store_offset + pop->obj_store_size; pop->heap_offset = (pop->heap_offset + Pagesize - 1) & ~(Pagesize - 1); pop->heap_size = poolsize - pop->heap_offset; /* initialize heap prior to storing the checksum */ if ((errno = heap_init(pop)) != 0) { ERR("!heap_init"); return -1; } util_checksum(dscp, OBJ_DSC_P_SIZE, &pop->checksum, 1); /* store the persistent part of pool's descriptor (2kB) */ pmem_msync(dscp, OBJ_DSC_P_SIZE); return 0; }
/* * pmemblk_descr_create -- (internal) create block memory pool descriptor */ static int pmemblk_descr_create(PMEMblkpool *pbp, uint32_t bsize, int zeroed) { LOG(3, "pbp %p bsize %u zeroed %d", pbp, bsize, zeroed); /* create the required metadata */ pbp->bsize = htole32(bsize); pmem_msync(&pbp->bsize, sizeof (bsize)); pbp->is_zeroed = zeroed; pmem_msync(&pbp->is_zeroed, sizeof (pbp->is_zeroed)); return 0; }
/* * update_replicas_linkage -- (internal) update uuids linking replicas */ static int update_replicas_linkage(struct pool_set *set, unsigned repn) { struct pool_replica *rep = REP(set, repn); struct pool_replica *prev_r = REP(set, repn - 1); struct pool_replica *next_r = REP(set, repn + 1); /* set uuids in the current replica */ for (unsigned p = 0; p < rep->nparts; ++p) { struct pool_hdr *hdrp = HDR(rep, p); memcpy(hdrp->prev_repl_uuid, PART(prev_r, 0).uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_repl_uuid, PART(next_r, 0).uuid, POOL_HDR_UUID_LEN); util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 1); /* store pool's header */ pmem_msync(hdrp, sizeof(*hdrp)); } /* set uuids in the previous replica */ for (unsigned p = 0; p < prev_r->nparts; ++p) { struct pool_hdr *prev_hdrp = HDR(prev_r, p); memcpy(prev_hdrp->next_repl_uuid, PART(rep, 0).uuid, POOL_HDR_UUID_LEN); util_checksum(prev_hdrp, sizeof(*prev_hdrp), &prev_hdrp->checksum, 1); /* store pool's header */ pmem_msync(prev_hdrp, sizeof(*prev_hdrp)); } /* set uuids in the next replica */ for (unsigned p = 0; p < next_r->nparts; ++p) { struct pool_hdr *next_hdrp = HDR(next_r, p); memcpy(next_hdrp->prev_repl_uuid, PART(rep, 0).uuid, POOL_HDR_UUID_LEN); util_checksum(next_hdrp, sizeof(*next_hdrp), &next_hdrp->checksum, 1); /* store pool's header */ pmem_msync(next_hdrp, sizeof(*next_hdrp)); } return 0; }
int main(int argc, char *argv[]) { char *pmemaddr; size_t mapped_len; int is_pmem; /* create a pmem file and memory map it */ if ((pmemaddr = pmem_map_file(PATH, PMEM_LEN, PMEM_FILE_CREATE, 0666, &mapped_len, &is_pmem)) == NULL) { perror("pmem_map_file"); exit(1); } /* store a string to the persistent memory */ strcpy(pmemaddr, "hello, persistent memory"); /* flush above strcpy to persistence */ if (is_pmem) pmem_persist(pmemaddr, mapped_len); else pmem_msync(pmemaddr, mapped_len); /* * Delete the mappings. The region is also * automatically unmapped when the process is * terminated. */ pmem_unmap(pmemaddr, mapped_len); }
/* * libc_memset_msync -- perform operation using libc memset() function * followed by pmem_msync(). */ static int libc_memset_msync(void *dest, int c, size_t len) { memset(dest, c, len); return pmem_msync(dest, len); }
/* * pmemlog_rewind -- discard all data, resetting a log memory pool to empty */ void pmemlog_rewind(PMEMlogpool *plp) { LOG(3, "plp %p", plp); if (plp->rdonly) { ERR("can't rewind read-only log"); errno = EROFS; return; } if ((errno = pthread_rwlock_wrlock(plp->rwlockp))) { ERR("!pthread_rwlock_wrlock"); return; } /* unprotect the pool descriptor (debug version only) */ RANGE_RW(plp->addr + sizeof (struct pool_hdr), LOG_FORMAT_DATA_ALIGN); plp->write_offset = plp->start_offset; if (plp->is_pmem) pmem_persist(&plp->write_offset, sizeof (uint64_t)); else pmem_msync(&plp->write_offset, sizeof (uint64_t)); /* set the write-protection again (debug version only) */ RANGE_RO(plp->addr + sizeof (struct pool_hdr), LOG_FORMAT_DATA_ALIGN); if ((errno = pthread_rwlock_unlock(plp->rwlockp))) ERR("!pthread_rwlock_unlock"); }
static void * obj_memset(void *ctx, void *ptr, int c, size_t sz) { memset(ptr, c, sz); UT_ASSERTeq(pmem_msync(ptr, sz), 0); return ptr; }
/* * os_range_deep_common -- call msnyc for non DEV dax */ int os_range_deep_common(uintptr_t addr, size_t len) { LOG(3, "os_range_deep_common addr %p len %lu", addr, len); return pmem_msync((void *)addr, len); }
/* * rpmem_memcpy_msync -- memcpy and msync */ static void * rpmem_memcpy_msync(void *pmemdest, const void *src, size_t len) { void *ret = pmem_memcpy(pmemdest, src, len, PMEM_F_MEM_NOFLUSH); pmem_msync(pmemdest, len); return ret; }
/* * os_part_deep_common -- common function to handle both * deep_persist and deep_drain part flush cases. */ int os_part_deep_common(struct pool_replica *rep, unsigned partidx, void *addr, size_t len, int flush) { LOG(3, "part %p part %d addr %p len %lu flush %d", rep, partidx, addr, len, flush); if (!rep->is_pmem) { /* * In case of part on non-pmem call msync on the range * to deep flush the data. Deep drain is empty as all * data is msynced to persistence. */ if (!flush) return 0; if (pmem_msync(addr, len)) { LOG(1, "pmem_msync(%p, %lu)", addr, len); return -1; } return 0; } /* Call deep flush if it was requested */ if (flush) { LOG(15, "pmem_deep_flush addr %p, len %lu", addr, len); pmem_deep_flush(addr, len); } /* * Before deep drain call normal drain to ensure that data * is at least in WPQ. */ pmem_drain(); /* * For deep_drain on normal pmem it is enough to * call msync on one page. */ if (pmem_msync(addr, MIN(Pagesize, len))) { LOG(1, "pmem_msync(%p, %lu)", addr, len); return -1; } return 0; }
/* * nopmem_memset_persist -- (internal) memset followed by an msync */ static void * nopmem_memset_persist(void *dest, int c, size_t len) { LOG(15, "dest %p c '%c' len %zu", dest, c, len); memset(dest, c, len); pmem_msync(dest, len); return dest; }
/* * nopmem_memcpy_persist -- (internal) memcpy followed by an msync */ static void * nopmem_memcpy_persist(void *dest, const void *src, size_t len) { LOG(15, "dest %p src %p len %zu", dest, src, len); memcpy(dest, src, len); pmem_msync(dest, len); return dest; }
/* * nssync -- (internal) flush changes made to a namespace range * * This is used in conjunction with the addresses handed out by * nsmap() above. There's no need to sync things written via * nswrite() since those changes are flushed each time nswrite() * is called. * * This routine is provided to btt_init() to allow the btt module to * do I/O on the memory pool containing the BTT layout. */ static void nssync(void *ns, unsigned lane, void *addr, size_t len) { struct pmemblk *pbp = (struct pmemblk *)ns; LOG(12, "pbp %p lane %u addr %p len %zu", pbp, lane, addr, len); if (pbp->is_pmem) pmem_persist(addr, len); else pmem_msync(addr, len); }
/* * heap_init -- initializes the heap * * If successful function returns zero. Otherwise an error number is returned. */ int heap_init(PMEMobjpool *pop) { if (pop->heap_size < HEAP_MIN_SIZE) return EINVAL; struct heap_layout *layout = heap_get_layout(pop); heap_write_header(&layout->header, pop->heap_size); pmem_msync(&layout->header, sizeof (struct heap_header)); return 0; }
/* * update_parts_linkage -- (internal) set uuids linking recreated parts within * a replica */ static int update_parts_linkage(struct pool_set *set, unsigned repn, struct poolset_health_status *set_hs) { struct pool_replica *rep = REP(set, repn); for (unsigned p = 0; p < rep->nparts; ++p) { struct pool_hdr *hdrp = HDR(rep, p); struct pool_hdr *prev_hdrp = HDR(rep, p - 1); struct pool_hdr *next_hdrp = HDR(rep, p + 1); /* set uuids in the current part */ memcpy(hdrp->prev_part_uuid, PART(rep, p - 1).uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_part_uuid, PART(rep, p + 1).uuid, POOL_HDR_UUID_LEN); util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 1); /* set uuids in the previous part */ memcpy(prev_hdrp->next_part_uuid, PART(rep, p).uuid, POOL_HDR_UUID_LEN); util_checksum(prev_hdrp, sizeof(*prev_hdrp), &prev_hdrp->checksum, 1); /* set uuids in the next part */ memcpy(next_hdrp->prev_part_uuid, PART(rep, p).uuid, POOL_HDR_UUID_LEN); util_checksum(next_hdrp, sizeof(*next_hdrp), &next_hdrp->checksum, 1); /* store pool's header */ pmem_msync(hdrp, sizeof(*hdrp)); pmem_msync(prev_hdrp, sizeof(*prev_hdrp)); pmem_msync(next_hdrp, sizeof(*next_hdrp)); } return 0; }
int main(int argc, char *argv[]) { int srcfd; char buf[BUF_LEN]; char *pmemaddr; size_t mapped_len; int is_pmem; int cc; if (argc != 3) { fprintf(stderr, "usage: %s src-file dst-file\n", argv[0]); exit(1); } /* open src-file */ if ((srcfd = open(argv[1], O_RDONLY)) < 0) { perror(argv[1]); exit(1); } /* create a pmem file and memory map it */ if ((pmemaddr = pmem_map_file(argv[2], BUF_LEN, PMEM_FILE_CREATE|PMEM_FILE_EXCL, 0666, &mapped_len, &is_pmem)) == NULL) { perror("pmem_map_file"); exit(1); } /* read up to BUF_LEN from srcfd */ if ((cc = read(srcfd, buf, BUF_LEN)) < 0) { pmem_unmap(pmemaddr, mapped_len); perror("read"); exit(1); } /* write it to the pmem */ if (is_pmem) { pmem_memcpy_persist(pmemaddr, buf, cc); } else { memcpy(pmemaddr, buf, cc); pmem_msync(pmemaddr, cc); } close(srcfd); pmem_unmap(pmemaddr, mapped_len); exit(0); }
/* * nswrite -- (internal) write data to the namespace encapsulating the BTT * * This routine is provided to btt_init() to allow the btt module to * do I/O on the memory pool containing the BTT layout. */ static int nswrite(void *ns, int lane, const void *buf, size_t count, off_t off) { struct pmemblk *pbp = (struct pmemblk *)ns; LOG(13, "pbp %p lane %d count %zu off %lld", pbp, lane, count, (long long)off); if (off + count > pbp->datasize) { ERR("offset + count (%lld) past end of data area (%zu)", (long long)off + count, pbp->datasize); errno = EINVAL; return -1; } void *dest = pbp->data + off; #ifdef DEBUG /* grab debug write lock */ if ((errno = pthread_mutex_lock(&pbp->write_lock))) { ERR("!pthread_mutex_lock"); return -1; } #endif /* unprotect the memory (debug version only) */ RANGE_RW(dest, count); if (pbp->is_pmem) pmem_memcpy_nodrain(dest, buf, count); else memcpy(dest, buf, count); /* protect the memory again (debug version only) */ RANGE_RO(dest, count); #ifdef DEBUG /* release debug write lock */ if ((errno = pthread_mutex_unlock(&pbp->write_lock))) ERR("!pthread_mutex_unlock"); #endif if (pbp->is_pmem) pmem_drain(); else pmem_msync(dest, count); return 0; }
int main(int argc, char *argv[]) { int fd; struct stat stbuf; char *dest; START(argc, argv, "pmem_valgr_simple"); if (argc != 4) FATAL("usage: %s file offset length", argv[0]); fd = OPEN(argv[1], O_RDWR); int dest_off = atoi(argv[2]); size_t bytes = strtoul(argv[3], NULL, 0); FSTAT(fd, &stbuf); dest = pmem_map(fd); if (dest == NULL) FATAL("!Could not mmap %s\n", argv[1]); /* these will not be made persistent */ *(int *)dest = 4; /* this will be made persistent */ uint64_t *tmp64dst = (void *)((uintptr_t)dest + 4096); *tmp64dst = 50; if (pmem_is_pmem(dest, sizeof (*tmp64dst))) { pmem_persist(tmp64dst, sizeof (*tmp64dst)); } else { pmem_msync(tmp64dst, sizeof (*tmp64dst)); } uint16_t *tmp16dst = (void *)((uintptr_t)dest + 1024); *tmp16dst = 21; /* will appear as flushed in valgrind log */ pmem_flush(tmp16dst, sizeof (*tmp16dst)); /* shows strange behavior of memset in some cases */ memset(dest + dest_off, 0, bytes); pmem_unmap(dest, stbuf.st_size); CLOSE(fd); DONE(NULL); }
/* * update_poolset_uuids -- (internal) update poolset uuid in recreated parts */ static int update_poolset_uuids(struct pool_set *set, unsigned repn, struct poolset_health_status *set_hs) { struct pool_replica *rep = REP(set, repn); for (unsigned p = 0; p < rep->nparts; ++p) { struct pool_hdr *hdrp = HDR(rep, p); memcpy(hdrp->poolset_uuid, set->uuid, POOL_HDR_UUID_LEN); util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 1); /* store pool's header */ pmem_msync(hdrp, sizeof(*hdrp)); } return 0; }
/* * copy_data_to_broken_parts -- (internal) copy data to all parts created * in place of the broken ones */ static int copy_data_to_broken_parts(struct pool_set *set, unsigned healthy_replica, unsigned flags, struct poolset_health_status *set_hs) { size_t poolsize = replica_get_pool_size(set, healthy_replica); for (unsigned r = 0; r < set_hs->nreplicas; ++r) { /* skip unbroken and consistent replicas */ if (replica_is_replica_healthy(r, set_hs)) continue; struct pool_replica *rep = REP(set, r); struct pool_replica *rep_h = REP(set, healthy_replica); for (unsigned p = 0; p < rep->nparts; ++p) { /* skip unbroken parts from consistent replicas */ if (!replica_is_part_broken(r, p, set_hs) && replica_is_replica_consistent(r, set_hs)) continue; const struct pool_set_part *part = &rep->part[p]; size_t off = replica_get_part_data_offset(set, r, p); size_t len = replica_get_part_data_len(set, r, p); /* do not allow copying too much data */ if (off >= poolsize) continue; if (off + len > poolsize) len = poolsize - off; void *src_addr = ADDR_SUM(rep_h->part[0].addr, off); /* First part of replica is mapped with header */ size_t fpoff = (p == 0) ? POOL_HDR_SIZE : 0; /* copy all data */ if (!is_dry_run(flags)) { memcpy(ADDR_SUM(part->addr, fpoff), src_addr, len); pmem_msync(ADDR_SUM(part->addr, fpoff), len); } } } return 0; }
/* * nswrite -- (internal) write data to the namespace encapsulating the BTT * * This routine is provided to btt_init() to allow the btt module to * do I/O on the memory pool containing the BTT layout. */ static int nswrite(void *ns, unsigned lane, const void *buf, size_t count, uint64_t off) { struct pmemblk *pbp = (struct pmemblk *)ns; LOG(13, "pbp %p lane %u count %zu off %ju", pbp, lane, count, off); if (off + count > pbp->datasize) { ERR("offset + count (%zu) past end of data area (%zu)", off + count, pbp->datasize); errno = EINVAL; return -1; } void *dest = (char *)pbp->data + off; #ifdef DEBUG /* grab debug write lock */ util_mutex_lock(&pbp->write_lock); #endif /* unprotect the memory (debug version only) */ RANGE_RW(dest, count); if (pbp->is_pmem) pmem_memcpy_nodrain(dest, buf, count); else memcpy(dest, buf, count); /* protect the memory again (debug version only) */ RANGE_RO(dest, count); #ifdef DEBUG /* release debug write lock */ util_mutex_unlock(&pbp->write_lock); #endif if (pbp->is_pmem) pmem_drain(); else pmem_msync(dest, count); return 0; }
int main(int argc, char *argv[]) { size_t mapped_len; char *dest; int is_pmem; START(argc, argv, "pmem_valgr_simple"); if (argc != 4) UT_FATAL("usage: %s file offset length", argv[0]); int dest_off = atoi(argv[2]); size_t bytes = strtoul(argv[3], NULL, 0); dest = pmem_map_file(argv[1], 0, 0, 0, &mapped_len, &is_pmem); if (dest == NULL) UT_FATAL("!Could not mmap %s\n", argv[1]); /* these will not be made persistent */ *(int *)dest = 4; /* this will be made persistent */ uint64_t *tmp64dst = (void *)((uintptr_t)dest + 4096); *tmp64dst = 50; if (is_pmem) { pmem_persist(tmp64dst, sizeof(*tmp64dst)); } else { pmem_msync(tmp64dst, sizeof(*tmp64dst)); } uint16_t *tmp16dst = (void *)((uintptr_t)dest + 1024); *tmp16dst = 21; /* will appear as flushed/fenced in valgrind log */ pmem_flush(tmp16dst, sizeof(*tmp16dst)); /* shows strange behavior of memset in some cases */ memset(dest + dest_off, 0, bytes); pmem_unmap(dest, mapped_len); DONE(NULL); }
void StorageManager::Sync(BackendType type, void *address, size_t length) { switch (type) { case BACKEND_TYPE_MM: { // Nothing to do here } break; case BACKEND_TYPE_FILE: { // flush writes for persistence if (is_pmem) pmem_persist(address, length); else pmem_msync(address, length); } break; case BACKEND_TYPE_INVALID: default: { // Nothing to do here } break; } }
/* * do_check -- check the mapping */ static void do_check(int fd, void *addr, size_t mlen) { /* arrange to catch SEGV */ struct sigaction v; sigemptyset(&v.sa_mask); v.sa_flags = 0; v.sa_handler = signal_handler; SIGACTION(SIGSEGV, &v, NULL); char pat[CHECK_BYTES]; char buf[CHECK_BYTES]; /* write some pattern to the file */ memset(pat, 0x5A, CHECK_BYTES); WRITE(fd, pat, CHECK_BYTES); if (memcmp(pat, addr, CHECK_BYTES)) UT_OUT("first %d bytes do not match", CHECK_BYTES); /* fill up mapped region with new pattern */ memset(pat, 0xA5, CHECK_BYTES); memcpy(addr, pat, CHECK_BYTES); UT_ASSERTeq(pmem_msync(addr, CHECK_BYTES), 0); UT_ASSERTeq(pmem_unmap(addr, mlen), 0); if (!ut_sigsetjmp(Jmp)) { /* same memcpy from above should now fail */ memcpy(addr, pat, CHECK_BYTES); } else { UT_OUT("unmap successful"); } LSEEK(fd, (os_off_t)0, SEEK_SET); if (READ(fd, buf, CHECK_BYTES) == CHECK_BYTES) { if (memcmp(pat, buf, CHECK_BYTES)) UT_OUT("first %d bytes do not match", CHECK_BYTES); } }
/* * pool_set_part_copy -- make a copy of the poolset part */ int pool_set_part_copy(struct pool_set_part *dpart, struct pool_set_part *spart) { LOG(3, "dpart %p spart %p", dpart, spart); int result = 0; util_stat_t stat_buf; if (util_stat(spart->path, &stat_buf)) { ERR("!util_stat"); return -1; } size_t smapped = 0; void *saddr = pmem_map_file(spart->path, 0, 0, S_IREAD, &smapped, NULL); if (!saddr) return -1; size_t dmapped = 0; int is_pmem; void *daddr = pmem_map_file(dpart->path, dpart->filesize, PMEM_FILE_CREATE | PMEM_FILE_EXCL, stat_buf.st_mode, &dmapped, &is_pmem); if (!daddr) { result = -1; goto out_sunmap; } if (is_pmem) { pmem_memcpy_persist(daddr, saddr, smapped); } else { memcpy(daddr, saddr, smapped); pmem_msync(daddr, smapped); } pmem_unmap(daddr, dmapped); out_sunmap: pmem_unmap(saddr, smapped); return result; }
/* * do_copy_to_non_pmem -- copy to a non-pmem memory mapped file */ void do_copy_to_non_pmem(char *addr, int srcfd, off_t len) { char *startaddr = addr; char buf[BUF_LEN]; int cc; /* copy the file, saving the last flush step to the end */ while ((cc = read(srcfd, buf, BUF_LEN)) > 0) { memcpy(addr, buf, cc); addr += cc; } if (cc < 0) { perror("read"); exit(1); } /* flush it */ if (pmem_msync(startaddr, len) < 0) { perror("pmem_msync"); exit(1); } }
/* * copy_data_to_broken_parts -- (internal) copy data to all parts created * in place of the broken ones */ static int copy_data_to_broken_parts(struct pool_set *set, unsigned healthy_replica, unsigned flags, struct poolset_health_status *set_hs) { /* get pool size from healthy replica */ size_t poolsize = set->poolsize; for (unsigned r = 0; r < set_hs->nreplicas; ++r) { /* skip unbroken and consistent replicas */ if (replica_is_replica_healthy(r, set_hs)) continue; struct pool_replica *rep = REP(set, r); struct pool_replica *rep_h = REP(set, healthy_replica); for (unsigned p = 0; p < rep->nparts; ++p) { /* skip unbroken parts from consistent replicas */ if (!replica_is_part_broken(r, p, set_hs) && replica_is_replica_consistent(r, set_hs)) continue; const struct pool_set_part *part = &rep->part[p]; size_t off = replica_get_part_data_offset(set, r, p); size_t len = replica_get_part_data_len(set, r, p); if (rep->remote) len = poolsize - off; /* do not allow copying too much data */ if (off >= poolsize) continue; /* * First part of replica is mapped * with header */ size_t fpoff = (p == 0) ? POOL_HDR_SIZE : 0; void *dst_addr = ADDR_SUM(part->addr, fpoff); if (rep->remote) { int ret = Rpmem_persist(rep->remote->rpp, off - POOL_HDR_SIZE, len, 0); if (ret) { LOG(1, "Copying data to remote node " "failed -- '%s' on '%s'", rep->remote->pool_desc, rep->remote->node_addr); return -1; } } else if (rep_h->remote) { int ret = Rpmem_read(rep_h->remote->rpp, dst_addr, off - POOL_HDR_SIZE, len); if (ret) { LOG(1, "Reading data from remote node " "failed -- '%s' on '%s'", rep_h->remote->pool_desc, rep_h->remote->node_addr); return -1; } } else { if (off + len > poolsize) len = poolsize - off; void *src_addr = ADDR_SUM(rep_h->part[0].addr, off); /* copy all data */ memcpy(dst_addr, src_addr, len); pmem_msync(dst_addr, len); } } } return 0; }
int main(int argc, char *argv[]) { int srcfd; int dstfd; char buf[BUF_LEN]; char *pmemaddr; int is_pmem; int cc; if (argc != 3) { fprintf(stderr, "usage: %s src-file dst-file\n", argv[0]); exit(1); } /* open src-file */ if ((srcfd = open(argv[1], O_RDONLY)) < 0) { perror(argv[1]); exit(1); } /* create a pmem file */ if ((dstfd = open(argv[2], O_CREAT|O_EXCL|O_RDWR, 0666)) < 0) { perror(argv[2]); exit(1); } /* allocate the pmem */ if ((errno = posix_fallocate(dstfd, 0, BUF_LEN)) != 0) { perror("posix_fallocate"); exit(1); } /* memory map it */ if ((pmemaddr = pmem_map(dstfd)) == NULL) { perror("pmem_map"); exit(1); } close(dstfd); /* determine if range is true pmem */ is_pmem = pmem_is_pmem(pmemaddr, BUF_LEN); /* read up to BUF_LEN from srcfd */ if ((cc = read(srcfd, buf, BUF_LEN)) < 0) { perror("read"); exit(1); } /* write it to the pmem */ if (is_pmem) { pmem_memcpy(pmemaddr, buf, cc); } else { memcpy(pmemaddr, buf, cc); pmem_msync(pmemaddr, cc); } close(srcfd); exit(0); }