/* * write_consistent -- (internal) write data in a consistent manner */ static void write_consistent(struct three_field *structp) { structp->first_field = 1; structp->second_field = 1; structp->third_field = 1; pmem_persist(&structp->first_field, sizeof(int) * 3); structp->flag = 1; pmem_persist(&structp->flag, sizeof(structp->flag)); }
/* * pmemalloc_coalesce_free -- find adjacent free blocks and coalesce them * * Scan the pmeme pool for recovery work: * - RESERVED clumps that need to be freed * - ACTIVATING clumps that need to be ACTIVE * - FREEING clumps that need to be freed * * Internal support routine, used during recovery. */ static void pmemalloc_coalesce_free(void *pmp) { struct clump *clp; struct pool_header *phdr; struct clump *firstfree; struct clump *lastfree; size_t csize; DEBUG("pmp=0x%lx", pmp); firstfree = lastfree = NULL; csize = 0; clp = PMEM(pmp, (struct clump *)PMEM_CLUMP_OFFSET); phdr = PMEM(pmp, (struct pool_header *)PMEM_HDR_OFFSET); pthread_mutex_lock( &phdr->pool_lock ); while (clp->size) { size_t sz = clp->size & ~PMEM_STATE_MASK; int state = clp->size & PMEM_STATE_MASK; DEBUG("[0x%lx]clump size %lx state %d", OFF(pmp, clp), sz, state); if (state == PMEM_STATE_FREE) { if (firstfree == NULL) firstfree = clp; else lastfree = clp; csize += sz; } else if (firstfree != NULL && lastfree != NULL) { DEBUG("coalesced size 0x%lx", csize); firstfree->size = csize | PMEM_STATE_FREE; pmem_persist(firstfree, sizeof(*firstfree), 0); firstfree = lastfree = NULL; csize = 0; } else { firstfree = lastfree = NULL; csize = 0; } clp = (struct clump *)((uintptr_t)clp + sz); DEBUG("next clp %lx, offset 0x%lx", clp, OFF(pmp, clp)); } if (firstfree != NULL && lastfree != NULL) { DEBUG("coalesced size 0x%lx", csize); DEBUG("firstfree 0x%lx next clp after firstfree will be 0x%lx", firstfree, (uintptr_t)firstfree + csize); firstfree->size = csize | PMEM_STATE_FREE; pmem_persist(firstfree, sizeof(*firstfree), 0); } pthread_mutex_unlock( &phdr->pool_lock ); }
/* * pmemalloc_activate -- atomically persist memory, mark in-use, store pointers * * Inputs: * pmp -- a pmp as returned by pmemalloc_init() * * ptr_ -- memory to be persisted, as returned by pmemalloc_reserve() */ void pmemalloc_activate(void *pmp, void *ptr_) { struct clump *clp; size_t sz; int i; struct pool_header *phdr; DEBUG("pmp=%lx, ptr_=%lx", pmp, ptr_); clp = PMEM(pmp, (struct clump *)((uintptr_t)ptr_ - PMEM_CHUNK_SIZE)); phdr = PMEM(pmp, (struct pool_header *)PMEM_HDR_OFFSET); pthread_mutex_lock(&phdr->activation_lock); ASSERTeq(clp->size & PMEM_STATE_MASK, PMEM_STATE_RESERVED); DEBUG("[0x%lx] clump on: 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx", OFF(pmp, clp), clp->on[0].off, clp->on[0].ptr_, clp->on[1].off, clp->on[1].ptr_, clp->on[2].off, clp->on[2].ptr_); sz = clp->size & ~PMEM_STATE_MASK; /* * order here is important: * 1. persist *ptr_ * 2. set state to ACTIVATING * 3. persist *clp (now we're committed to progressing to STATE_ACTIVE) * 4. execute "on" list, persisting each one * 5. clear out "on" list, last to first * 5. set state to ACTIVE * 6. persist *clp */ pmem_persist(PMEM(pmp, ptr_), clp->size - PMEM_CHUNK_SIZE, 0); clp->size = sz | PMEM_STATE_ACTIVATING; pmem_persist(clp, sizeof(*clp), 0); for (i = 0; i < PMEM_NUM_ON; i++) if (clp->on[i].off) { uintptr_t *dest = PMEM(pmp, (uintptr_t *)clp->on[i].off); *dest = (uintptr_t)clp->on[i].ptr_; pmem_persist(dest, sizeof(*dest), 0); } else break; for (i = PMEM_NUM_ON - 1; i >= 0; i--) clp->on[i].off = 0; pmem_persist(clp, sizeof(*clp), 0); clp->size = sz | PMEM_STATE_ACTIVE; pmem_persist(clp, sizeof(*clp), 0); pthread_mutex_unlock(&phdr->activation_lock); }
/* * pmemlog_persist -- (internal) persist data, then metadata * * On entry, the write lock should be held. */ static void pmemlog_persist(PMEMlogpool *plp, uint64_t new_write_offset) { uint64_t old_write_offset = le64toh(plp->write_offset); size_t length = new_write_offset - old_write_offset; /* unprotect the log space range (debug version only) */ RANGE_RW(plp->addr + old_write_offset, length); /* persist the data */ if (plp->is_pmem) pmem_drain(); /* data already flushed */ else pmem_msync(plp->addr + old_write_offset, length); /* protect the log space range (debug version only) */ RANGE_RO(plp->addr + old_write_offset, length); /* unprotect the pool descriptor (debug version only) */ RANGE_RW(plp->addr + sizeof (struct pool_hdr), LOG_FORMAT_DATA_ALIGN); /* write the metadata */ plp->write_offset = htole64(new_write_offset); /* persist the metadata */ if (plp->is_pmem) pmem_persist(&plp->write_offset, sizeof (plp->write_offset)); else pmem_msync(&plp->write_offset, sizeof (plp->write_offset)); /* set the write-protection again (debug version only) */ RANGE_RO(plp->addr + sizeof (struct pool_hdr), LOG_FORMAT_DATA_ALIGN); }
/* * pmemlog_rewind -- discard all data, resetting a log memory pool to empty */ void pmemlog_rewind(PMEMlogpool *plp) { LOG(3, "plp %p", plp); if (plp->rdonly) { ERR("can't rewind read-only log"); errno = EROFS; return; } if ((errno = pthread_rwlock_wrlock(plp->rwlockp))) { ERR("!pthread_rwlock_wrlock"); return; } /* unprotect the pool descriptor (debug version only) */ RANGE_RW(plp->addr + sizeof (struct pool_hdr), LOG_FORMAT_DATA_ALIGN); plp->write_offset = plp->start_offset; if (plp->is_pmem) pmem_persist(&plp->write_offset, sizeof (uint64_t)); else pmem_msync(&plp->write_offset, sizeof (uint64_t)); /* set the write-protection again (debug version only) */ RANGE_RO(plp->addr + sizeof (struct pool_hdr), LOG_FORMAT_DATA_ALIGN); if ((errno = pthread_rwlock_unlock(plp->rwlockp))) ERR("!pthread_rwlock_unlock"); }
int main(int argc, char *argv[]) { char *pmemaddr; size_t mapped_len; int is_pmem; /* create a pmem file and memory map it */ if ((pmemaddr = pmem_map_file(PATH, PMEM_LEN, PMEM_FILE_CREATE, 0666, &mapped_len, &is_pmem)) == NULL) { perror("pmem_map_file"); exit(1); } /* store a string to the persistent memory */ strcpy(pmemaddr, "hello, persistent memory"); /* flush above strcpy to persistence */ if (is_pmem) pmem_persist(pmemaddr, mapped_len); else pmem_msync(pmemaddr, mapped_len); /* * Delete the mappings. The region is also * automatically unmapped when the process is * terminated. */ pmem_unmap(pmemaddr, mapped_len); }
/* * write_inconsistent -- (internal) write data in an inconsistent manner. */ static void write_inconsistent(struct three_field *structp) { structp->flag = 1; structp->first_field = 1; structp->second_field = 1; structp->third_field = 1; pmem_persist(structp, sizeof(*structp)); }
/* * libc_memcpy_persist -- copy using libc memcpy() function * followed by pmem_persist(). */ static int libc_memcpy_persist(void *dest, void *source, size_t len) { memcpy(dest, source, len); pmem_persist(dest, len); return 0; }
/* * libc_memset_persist -- perform operation using libc memset() function * followed by pmem_persist(). */ static int libc_memset_persist(void *dest, int c, size_t len) { memset(dest, c, len); pmem_persist(dest, len); return 0; }
/* * pmemalloc_onfree -- set assignments for when allocation gets freed * * Inputs: * pmp -- The pmp as returned by pmemalloc_init() for the persistent * Memory pool containing both the persistent memory chunk to * be returned and the persistent data structure used by the * calling program to track the allocated persistent memory. * * ptr_ -- Relative pointer to the persistent memory to be returned * * parentp -- Absolute pointer to the persistent relative pointer * used by the calling program to track the chunk of * persistent memory referenced by ptr_. The persistent * relative pointer must be within the same PM pool. * * nptr_ -- The value to set in *parentp */ void pmemalloc_onfree(void *pmp, void *ptr_, void **parentp, void *nptr_) { struct clump *clp; int i; struct pool_header *phdr; DEBUG("pmp=0x%lx, ptr_=0x%lx, parentp_=0x%lx, nptr_=0x%lx", pmp, ptr_, parentp, nptr_); clp = PMEM(pmp, (struct clump *)((uintptr_t)ptr_ - PMEM_CHUNK_SIZE)); phdr = PMEM(pmp, (struct pool_header *)PMEM_HDR_OFFSET); pthread_mutex_lock(&phdr->activation_lock); ASSERTeq(clp->size & PMEM_STATE_MASK, PMEM_STATE_ACTIVE); DEBUG("[0x%lx] clump on: 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx", OFF(pmp, clp), clp->on[0].off, clp->on[0].ptr_, clp->on[1].off, clp->on[1].ptr_, clp->on[2].off, clp->on[2].ptr_); for (i = 0; i < PMEM_NUM_ON; i++) if (clp->on[i].off == 0) { DEBUG("using on[%d], off 0x%lx", i, OFF(pmp, parentp)); /* * order here is important: * 1. set ptr_ * 2. make ptr_ persistent * 3. set off * 4. make off persistent */ clp->on[i].ptr_ = nptr_; pmem_persist(clp, sizeof(*clp), 0); clp->on[i].off = OFF(pmp, parentp); pmem_persist(clp, sizeof(*clp), 0); pthread_mutex_unlock(&phdr->activation_lock); return; } FATAL("exceeded onfree limit (%d)", PMEM_NUM_ON); }
/* * nssync -- (internal) flush changes made to a namespace range * * This is used in conjunction with the addresses handed out by * nsmap() above. There's no need to sync things written via * nswrite() since those changes are flushed each time nswrite() * is called. * * This routine is provided to btt_init() to allow the btt module to * do I/O on the memory pool containing the BTT layout. */ static void nssync(void *ns, unsigned lane, void *addr, size_t len) { struct pmemblk *pbp = (struct pmemblk *)ns; LOG(12, "pbp %p lane %u addr %p len %zu", pbp, lane, addr, len); if (pbp->is_pmem) pmem_persist(addr, len); else pmem_msync(addr, len); }
int main(int argc, char *argv[]) { int fd; struct stat stbuf; char *dest; START(argc, argv, "pmem_valgr_simple"); if (argc != 4) FATAL("usage: %s file offset length", argv[0]); fd = OPEN(argv[1], O_RDWR); int dest_off = atoi(argv[2]); size_t bytes = strtoul(argv[3], NULL, 0); FSTAT(fd, &stbuf); dest = pmem_map(fd); if (dest == NULL) FATAL("!Could not mmap %s\n", argv[1]); /* these will not be made persistent */ *(int *)dest = 4; /* this will be made persistent */ uint64_t *tmp64dst = (void *)((uintptr_t)dest + 4096); *tmp64dst = 50; if (pmem_is_pmem(dest, sizeof (*tmp64dst))) { pmem_persist(tmp64dst, sizeof (*tmp64dst)); } else { pmem_msync(tmp64dst, sizeof (*tmp64dst)); } uint16_t *tmp16dst = (void *)((uintptr_t)dest + 1024); *tmp16dst = 21; /* will appear as flushed in valgrind log */ pmem_flush(tmp16dst, sizeof (*tmp16dst)); /* shows strange behavior of memset in some cases */ memset(dest + dest_off, 0, bytes); pmem_unmap(dest, stbuf.st_size); CLOSE(fd); DONE(NULL); }
int main(int argc, char *argv[]) { size_t mapped_len; char *dest; int is_pmem; START(argc, argv, "pmem_valgr_simple"); if (argc != 4) UT_FATAL("usage: %s file offset length", argv[0]); int dest_off = atoi(argv[2]); size_t bytes = strtoul(argv[3], NULL, 0); dest = pmem_map_file(argv[1], 0, 0, 0, &mapped_len, &is_pmem); if (dest == NULL) UT_FATAL("!Could not mmap %s\n", argv[1]); /* these will not be made persistent */ *(int *)dest = 4; /* this will be made persistent */ uint64_t *tmp64dst = (void *)((uintptr_t)dest + 4096); *tmp64dst = 50; if (is_pmem) { pmem_persist(tmp64dst, sizeof(*tmp64dst)); } else { pmem_msync(tmp64dst, sizeof(*tmp64dst)); } uint16_t *tmp16dst = (void *)((uintptr_t)dest + 1024); *tmp16dst = 21; /* will appear as flushed/fenced in valgrind log */ pmem_flush(tmp16dst, sizeof(*tmp16dst)); /* shows strange behavior of memset in some cases */ memset(dest + dest_off, 0, bytes); pmem_unmap(dest, mapped_len); DONE(NULL); }
void StorageManager::Sync(BackendType type, void *address, size_t length) { switch (type) { case BACKEND_TYPE_MM: { // Nothing to do here } break; case BACKEND_TYPE_FILE: { // flush writes for persistence if (is_pmem) pmem_persist(address, length); else pmem_msync(address, length); } break; case BACKEND_TYPE_INVALID: default: { // Nothing to do here } break; } }
int main(int argc, char *argv[]) { int opt; int iflag = 0; unsigned long icount; const char *path; struct stat stbuf; size_t size; int fd; char *pmaddr; Myname = argv[0]; while ((opt = getopt(argc, argv, "FMdi:")) != -1) { switch (opt) { case 'F': pmem_fit_mode(); break; case 'M': pmem_msync_mode(); break; case 'd': Debug++; break; case 'i': iflag++; icount = strtoul(optarg, NULL, 10); break; default: USAGE(NULL); } } if (optind >= argc) USAGE("No path given"); path = argv[optind++]; if (stat(path, &stbuf) < 0) { /* * file didn't exist, create it with DEFAULT_SIZE */ if ((fd = open(path, O_CREAT|O_RDWR, 0666)) < 0) FATALSYS("can't create %s", path); if ((errno = posix_fallocate(fd, 0, DEFAULT_SIZE)) != 0) FATALSYS("posix_fallocate"); size = DEFAULT_SIZE; } else { /* * file exists, just open it */ if ((fd = open(path, O_RDWR)) < 0) FATALSYS("open %s", path); size = stbuf.st_size; } /* * map the file into our address space. */ if ((pmaddr = pmem_map(fd, size)) == NULL) FATALSYS("pmem_map"); if (optind < argc) { /* strings supplied as arguments? */ int i; char *ptr = pmaddr; if (iflag) icount_start(icount); /* start instruction count */ for (i = optind; i < argc; i++) { size_t len = strlen(argv[i]) + 1; /* includes '\0' */ if (len > size) FATAL("no more room for %d-byte string", len); /* store to Persistent Memory */ strcpy(ptr, argv[i]); /* make that change durable */ pmem_persist(ptr, len, 0); ptr += len; size -= len; } if (iflag) { icount_stop(); /* end instruction count */ printf("Total instruction count: %lu\n", icount_total()); } } else { char *ptr = pmaddr; char *sep = ""; /* * dump out all the strings we find in Persistent Memory */ while (ptr < &pmaddr[size]) { /* load from Persistent Memory */ if (isprint(*ptr)) { putc(*ptr, stdout); sep = "\n"; } else if (*ptr == '\0') { fputs(sep, stdout); sep = ""; } ptr++; } } exit(0); }
/* * rpmemd_pmem_persist -- pmem_persist wrapper required to unify function * pointer type with pmem_msync */ int rpmemd_pmem_persist(const void *addr, size_t len) { pmem_persist(addr, len); return 0; }
/* * pmemalloc_recover -- recover after a possible crash * * Internal support routine, used during recovery. */ static void pmemalloc_recover(void *pmp) { struct clump *clp; struct pool_header *phdr; int i; DEBUG("pmp=0x%lx", pmp); clp = PMEM(pmp, (struct clump *)PMEM_CLUMP_OFFSET); phdr = PMEM(pmp, (struct pool_header *)PMEM_HDR_OFFSET); while (clp->size) { size_t sz = clp->size & ~PMEM_STATE_MASK; int state = clp->size & PMEM_STATE_MASK; DEBUG("[0x%lx]clump size %lx state %d", OFF(pmp, clp), sz, state); switch (state) { case PMEM_STATE_RESERVED: /* return the clump to the FREE pool */ for (i = PMEM_NUM_ON - 1; i >= 0; i--) clp->on[i].off = 0; pmem_persist(clp, sizeof(*clp), 0); clp->size = sz | PMEM_STATE_FREE; pmem_persist(clp, sizeof(*clp), 0); break; case PMEM_STATE_ACTIVATING: /* finish progressing the clump to ACTIVE */ for (i = 0; i < PMEM_NUM_ON; i++) if (clp->on[i].off) { uintptr_t *dest = PMEM(pmp, (uintptr_t *)clp->on[i].off); *dest = (uintptr_t)clp->on[i].ptr_; pmem_persist(dest, sizeof(*dest), 0); } else break; for (i = PMEM_NUM_ON - 1; i >= 0; i--) clp->on[i].off = 0; pmem_persist(clp, sizeof(*clp), 0); clp->size = sz | PMEM_STATE_ACTIVE; pmem_persist(clp, sizeof(*clp), 0); break; case PMEM_STATE_FREEING: /* finish progressing the clump to FREE */ for (i = 0; i < PMEM_NUM_ON; i++) if (clp->on[i].off) { uintptr_t *dest = PMEM(pmp, (uintptr_t *)clp->on[i].off); *dest = (uintptr_t)clp->on[i].ptr_; pmem_persist(dest, sizeof(*dest), 0); } else break; for (i = PMEM_NUM_ON - 1; i >= 0; i--) clp->on[i].off = 0; pmem_persist(clp, sizeof(*clp), 0); clp->size = sz | PMEM_STATE_FREE; pmem_persist(clp, sizeof(*clp), 0); break; } clp = (struct clump *)((uintptr_t)clp + sz); DEBUG("next clp %lx, offset 0x%lx", clp, OFF(pmp, clp)); } pthread_mutex_init(&phdr->pool_lock, NULL); pthread_mutex_init(&phdr->activation_lock, NULL); pmem_persist(phdr, sizeof(*phdr), 0); }
/* * pmemalloc_reserve -- allocate memory, volatile until pmemalloc_activate() * * Inputs: * pmp -- a pmp as returned by pmemalloc_init() * * size -- number of bytes to allocate * * Outputs: * On success, this function returns memory allocated from the * memory-mapped file associated with pmp. The memory is suitably * aligned for any kind of variable. The memory is not initialized. * * On failure, this function returns NULL and errno is set. * * The memory returned is initially *volatile* meaning that if the * program exits (or system crashes) before pmemalloc_activate() is called * with the return value, it is considered incompletely allocated * and the memory is returned to the free pool in the memory-mapped * file. It works this way to prevent memory leaks when the system * crashes between a successful return from pmemalloc_reserve() and when * the caller actually links something to point at the new memory. * The basic pattern for using pmemalloc_reserve() is this: * * np_ = pmemalloc_reserve(pmp, sizeof(*np_)); * ...fill in fields in *np_... * pmemalloc_onactive(pmp, np_, &parent->next_, np_); * pmemalloc_activate(pmp, np_); * * In addition to flushing the data at *np_ to persistence, the * pmemalloc_activate() call above also atomically marks that memory * as in-use and stores the pointer to the persistent-memory- * based pointer parent->next_ in this example). So any crash that * happens before parent->next_ is set to point at the new memory will * result in the memory being returned back to the free list. */ void * pmemalloc_reserve(void *pmp, size_t size) { size_t nsize = roundup(size + PMEM_CHUNK_SIZE, PMEM_CHUNK_SIZE); struct clump *clp; struct pool_header *phdr; DEBUG("pmp=0x%lx, size=0x%lx -> 0x%lx", pmp, size, nsize); clp = PMEM(pmp, (struct clump *)PMEM_CLUMP_OFFSET); phdr = PMEM(pmp, (struct pool_header *)PMEM_HDR_OFFSET); if (clp->size == 0) FATAL("no clumps found"); pthread_mutex_lock( &phdr->pool_lock ); /* first fit */ while (clp->size) { size_t sz = clp->size & ~PMEM_STATE_MASK; int state = clp->size & PMEM_STATE_MASK; DEBUG("[0x%lx] clump size 0x%lx state %d", OFF(pmp, clp), sz, state); if (state == PMEM_STATE_FREE && nsize <= sz) { void *ptr = (void *) (uintptr_t)clp + PMEM_CHUNK_SIZE - (uintptr_t)pmp; size_t leftover = sz - nsize; DEBUG("fit found ptr 0x%lx, leftover 0x%lx bytes", ptr, leftover); if (leftover >= PMEM_CHUNK_SIZE * 2) { struct clump *newclp; int i; newclp = (struct clump *) ((uintptr_t)clp + nsize); DEBUG("splitting: [0x%lx] new clump", OFF(pmp, newclp)); /* * can go ahead and start fiddling with * this freely since it is in the middle * of a free clump until we change fields * in *clp. order here is important: * 1. initialize new clump * 2. persist new clump * 3. initialize existing clump do list * 4. persist existing clump * 5. set new clump size, RESERVED * 6. persist existing clump */ memset(newclp, '\0', sizeof(*newclp)); newclp->size = leftover | PMEM_STATE_FREE; pmem_persist(newclp, sizeof(*newclp), 0); for (i = 0; i < PMEM_NUM_ON; i++) { clp->on[i].off = 0; clp->on[i].ptr_ = 0; } pmem_persist(clp, sizeof(*clp), 0); clp->size = nsize | PMEM_STATE_RESERVED; pmem_persist(clp, sizeof(*clp), 0); } else { int i; DEBUG("no split required"); for (i = 0; i < PMEM_NUM_ON; i++) { clp->on[i].off = 0; clp->on[i].ptr_ = 0; } pmem_persist(clp, sizeof(*clp), 0); clp->size = sz | PMEM_STATE_RESERVED; pmem_persist(clp, sizeof(*clp), 0); } pthread_mutex_unlock( &phdr->pool_lock ); return ptr; } clp = (struct clump *)((uintptr_t)clp + sz); DEBUG("[0x%lx] next clump", OFF(pmp, clp)); } pthread_mutex_unlock( &phdr->pool_lock ); DEBUG("no free memory of size %lu available", nsize); errno = ENOMEM; return NULL; }
/* * pmemalloc_free -- free memory * * Inputs: * pmp -- a pmp as returned by pmemalloc_init() * * ptr_ -- memory to be freed, as returned by pmemalloc_reserve() */ void pmemalloc_free(void *pmp, void *ptr_) { struct clump *clp; size_t sz; int state; int i; struct pool_header *phdr; DEBUG("pmp=%lx, ptr_=%lx", pmp, ptr_); clp = PMEM(pmp, (struct clump *)((uintptr_t)ptr_ - PMEM_CHUNK_SIZE)); phdr = PMEM(pmp, (struct pool_header *)PMEM_HDR_OFFSET); DEBUG("[0x%lx] clump on: 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx", OFF(pmp, clp), clp->on[0].off, clp->on[0].ptr_, clp->on[1].off, clp->on[1].ptr_, clp->on[2].off, clp->on[2].ptr_); pthread_mutex_lock(&phdr->activation_lock); sz = clp->size & ~PMEM_STATE_MASK; state = clp->size & PMEM_STATE_MASK; if (state != PMEM_STATE_RESERVED && state != PMEM_STATE_ACTIVE) FATAL("freeing clump in bad state: %d", state); if (state == PMEM_STATE_ACTIVE) { /* * order here is important: * 1. set state to FREEING * 2. persist *clp (now we're committed towards STATE_FREE) * 3. execute onfree stores, persisting each one * 4. set state to FREE * 5. persist *clp */ clp->size = sz | PMEM_STATE_FREEING; pmem_persist(clp, sizeof(*clp), 0); for (i = 0; i < PMEM_NUM_ON; i++) if (clp->on[i].off) { uintptr_t *dest = PMEM(pmp, (uintptr_t *)clp->on[i].off); *dest = (uintptr_t)clp->on[i].ptr_; pmem_persist(dest, sizeof(*dest), 0); } else break; for (i = PMEM_NUM_ON - 1; i >= 0; i--) clp->on[i].off = 0; pmem_persist(clp, sizeof(*clp), 0); } clp->size = sz | PMEM_STATE_FREE; pmem_persist(clp, sizeof(*clp), 0); pthread_mutex_unlock(&phdr->activation_lock); /* * at this point we may have adjacent free clumps that need * to be coalesced. there are three interesting cases: * case 1: the clump below us is free (need to combine two clumps) * case 2: the clump above us is free (need to combine two clumps) * case 3: both are free (need to combining three clumps) * XXX this can be much more optimal by using clp->prevsize to * get back to the clump below us. for now, we just invoke * the recovery code for coalescing. */ pmemalloc_coalesce_free(pmp); }
// pmemalloc_free -- free memory, find adjacent free blocks and coalesce them static void pmemalloc_free(void *abs_ptr_) { if (abs_ptr_ == NULL) return; struct clump *clp, *firstfree, *lastfree, *next_clp; int first = 1, last = 1; size_t csize; size_t sz; firstfree = lastfree = NULL; csize = 0; DEBUG("ptr_=%lx", abs_ptr_); clp = (struct clump *) ((uintptr_t) abs_ptr_ - PMEM_CHUNK_SIZE); sz = clp->size & ~PMEM_STATE_MASK; DEBUG("size=%lu", sz); lastfree = (struct clump *) ((uintptr_t) clp + sz); //DEBUG("validate lastfree %p", REL_PTR(lastfree)); if ((lastfree->size & PMEM_STATE_MASK) != PMEM_STATE_FREE) last = 0; firstfree = (struct clump *) ((uintptr_t) clp - clp->prevsize); //DEBUG("validate firstfree %p", REL_PTR(firstfree)); if (firstfree == clp || ((firstfree->size & PMEM_STATE_MASK) != PMEM_STATE_FREE)) first = 0; if (first && last) { DEBUG("******* F C L "); size_t first_sz = firstfree->size & ~PMEM_STATE_MASK; size_t last_sz = lastfree->size & ~PMEM_STATE_MASK; csize = first_sz + sz + last_sz; PM_EQU((firstfree->size), (csize | PMEM_STATE_FREE)); pmem_persist(firstfree, sizeof(*firstfree), 0); next_clp = (struct clump *) ((uintptr_t) lastfree + last_sz); PM_EQU((next_clp->prevsize), (csize)); pmem_persist(next_clp, sizeof(*next_clp), 0); prev_clp = firstfree; //DEBUG("validate firstfree %p", REL_PTR(firstfree)); } else if (first) { DEBUG("******* F C "); size_t first_sz = firstfree->size & ~PMEM_STATE_MASK; csize = first_sz + sz; PM_EQU((firstfree->size), (csize | PMEM_STATE_FREE)); pmem_persist(firstfree, sizeof(*firstfree), 0); next_clp = lastfree; PM_EQU((next_clp->prevsize), (csize)); pmem_persist(next_clp, sizeof(*next_clp), 0); prev_clp = firstfree; //DEBUG("validate firstfree %p", REL_PTR(firstfree)); //DEBUG("validate lastfree %p", REL_PTR(firstfree)); } else if (last) { DEBUG("******* C L "); size_t last_sz = lastfree->size & ~PMEM_STATE_MASK; csize = sz + last_sz; PM_EQU((clp->size), (csize | PMEM_STATE_FREE)); pmem_persist(clp, sizeof(*clp), 0); next_clp = (struct clump *) ((uintptr_t) lastfree + last_sz); PM_EQU((next_clp->prevsize), (csize)); pmem_persist(next_clp, sizeof(*next_clp), 0); prev_clp = clp; //DEBUG("validate firstfree %p", REL_PTR(firstfree)); //DEBUG("validate clump %p", REL_PTR(clp)); } else { DEBUG("******* C "); csize = sz; PM_EQU((clp->size), (csize | PMEM_STATE_FREE)); pmem_persist(clp, sizeof(*clp), 0); //DEBUG("validate clump %p", REL_PTR(clp)); } }
// pmemalloc_reserve -- allocate memory, volatile until pmemalloc_activate() static void *pmemalloc_reserve(size_t size) { size_t nsize; if (size <= 64) { nsize = 128; } else { size_t temp = 63; nsize = 64 + ((size + 63) & ~temp); } //cerr<<"size :: "<<size<<" nsize :: "<<nsize<<endl; struct clump *clp; struct clump* next_clp; int loop = 0; DEBUG("size= %zu", nsize); if (prev_clp != NULL) { clp = prev_clp; // printf("prev_clp=%p\n", prev_clp); } else { clp = (struct clump *)ABS_PTR((struct clump *) PMEM_CLUMP_OFFSET); } DEBUG("pmp=%p clp= %p, size of clp=%d size of struct clump =%d", pmp, clp, sizeof(clp), sizeof(struct clump)); /* first fit */ check: //unsigned int itr = 0; while (clp->size) { // DEBUG("************** itr :: %lu ", itr++); size_t sz = clp->size & ~PMEM_STATE_MASK; int state = clp->size & PMEM_STATE_MASK; DEBUG("size : %lu state : %d", sz, state); if (nsize <= sz) { if (state == PMEM_STATE_FREE) { void *ptr = (void *) (uintptr_t) clp + PMEM_CHUNK_SIZE - (uintptr_t) pmp; size_t leftover = sz - nsize; DEBUG("fit found ptr 0x%lx, leftover %lu bytes", ptr, leftover); if (leftover >= PMEM_CHUNK_SIZE * 2) { struct clump *newclp; newclp = (struct clump *) ((uintptr_t) clp + nsize); DEBUG("splitting: [0x%lx] new clump", (struct clump *)REL_PTR(newclp)); /* * can go ahead and start fiddling with * this freely since it is in the middle * of a free clump until we change fields * in *clp. order here is important: * 1. initialize new clump * 2. persist new clump * 3. initialize existing clump do list * 4. persist existing clump * 5. set new clump size, RESERVED * 6. persist existing clump */ PM_EQU((newclp->size), (leftover | PMEM_STATE_FREE)); PM_EQU((newclp->prevsize), (nsize)); pmem_persist(newclp, sizeof(*newclp), 0); next_clp = (struct clump *) ((uintptr_t) newclp + leftover); PM_EQU((next_clp->prevsize), (leftover)); pmem_persist(next_clp, sizeof(*next_clp), 0); PM_EQU((clp->size), (nsize | PMEM_STATE_RESERVED)); pmem_persist(clp, sizeof(*clp), 0); //DEBUG("validate new clump %p", REL_PTR(newclp)); //DEBUG("validate orig clump %p", REL_PTR(clp)); //DEBUG("validate next clump %p", REL_PTR(next_clp)); } else { DEBUG("no split required"); PM_EQU((clp->size), (sz | PMEM_STATE_RESERVED)); pmem_persist(clp, sizeof(*clp), 0); next_clp = (struct clump *) ((uintptr_t) clp + sz); PM_EQU((next_clp->prevsize), (sz)); pmem_persist(next_clp, sizeof(*next_clp), 0); //DEBUG("validate orig clump %p", REL_PTR(clp)); //DEBUG("validate next clump %p", REL_PTR(next_clp)); } prev_clp = clp; return ABS_PTR(ptr); } } clp = (struct clump *) ((uintptr_t) clp + sz); DEBUG("next clump :: [0x%lx]", (struct clump *)REL_PTR(clp)); } if (loop == 0) { DEBUG("LOOP "); loop = 1; clp = (struct clump *)ABS_PTR((struct clump *) PMEM_CLUMP_OFFSET); goto check; } printf("no free memory of size %lu available \n", nsize); printf("Increase the size of the PM pool:\n"); printf("Increase PSEGMENT_RESERVED_REGION_SIZE in benchmarks/echo/echo/include/pm_instr.h\n"); //display(); errno = ENOMEM; exit(EXIT_FAILURE); return NULL; }
int main(int argc, char *argv[]) { const char *path; int opt; int retval; unsigned long thrd; int mbx; void **sa_ptr; mailbox_array_t *mbx_offset_; Myname = argv[0]; while ((opt = getopt(argc, argv, "t:r:s:d")) != -1) { switch (opt) { case 't': if (sscanf(optarg, "%u", &num_threads) == EOF) { USAGE( "-t option error"); } if (num_threads > MAX_THREADS) { fprintf( stderr, "using max threads %d\n", MAX_THREADS ); num_threads = MAX_THREADS; } break; case 'r': if (sscanf(optarg, "%u", &runtime)==EOF) { USAGE("-r option error"); } break; case 's': if (sscanf(optarg, "%u", &max_malloc)==EOF) USAGE("-s option error"); break; case 'd': Debug=TRUE; break; default: USAGE(NULL); } } /* end while opt */ if (optind >= argc) USAGE("No path given"); path = argv[optind++]; if (optind < argc) USAGE(NULL); /* * Use the alloc_init lib function to open the pool * via pmfs, and map it into our address space. * This returns a regular (absolute) pointer. */ if ((pmp = pmemalloc_init(path, POOL_SIZE)) == NULL) FATALSYS("pmemalloc_init on %s", path); /* * Fetch our static info. * The first word is used to store a relative pointer to * the mailbox array. The library function converts this * to an absolute pointer. */ sa_ptr = (void**)pmemalloc_static_area(pmp); /* The static area for a new pmem pool is zero'd */ if (*sa_ptr == NULL) { /* * Create and initialize the mailbox array in PM */ if ((mbx_offset_=pmemalloc_reserve(pmp, sizeof(mailbox_array_t))) == NULL ) FATALSYS("pmemalloc mailbox array"); /* * Place a pointer to this array in the first word of the * static area on activation */ pmemalloc_onactive( pmp, mbx_offset_, (void**)sa_ptr, mbx_offset_ ); pmemalloc_activate( pmp, mbx_offset_ ); /* Set the static, regular pointer to be used in the program */ mbx_array_ptr = PMEM( pmp, mbx_offset_ ); for (thrd=0; thrd<MAX_THREADS; ++thrd) { for (mbx=0; mbx<MAILBOXES; ++mbx) { (*mbx_array_ptr)[thrd][mbx] = NULL; } } } else { /* * This region already exists from a previous run. * Free any pmem spaces still in the mailbox. */ mbx_array_ptr = PMEM( pmp, (mailbox_array_t*)*sa_ptr ); for (thrd=0; thrd<MAX_THREADS; ++thrd) { for (mbx=0; mbx<MAILBOXES; ++mbx) { if ((*mbx_array_ptr)[thrd][mbx] != NULL) { pmemalloc_onfree( pmp, (*mbx_array_ptr)[thrd][mbx], &(*mbx_array_ptr)[thrd][mbx], NULL ); pmemalloc_free( pmp, (*mbx_array_ptr)[thrd][mbx] ); } } } } /* Commit the initialized mailbox to persistent media */ pmem_persist( mbx_array_ptr, sizeof(mailbox_array_t), 0 ); DEBUG( "Number of threads = %d", num_threads); DEBUG( "Runtime: %d seconds", runtime ); DEBUG( "Max alloc size %d bytes", max_malloc ); /* * Create each allocating thread. Each allocating thread * will create its corresponding freeing thread. * Once each each thread is created, signal the start condition * so they all start running around the same time. */ for (thrd=0; thrd<num_threads; ++thrd) { retval = pthread_create( &alloc_threads[thrd], NULL, alloc_main, (void *) thrd ); if (retval) { errno = retval; FATALSYS( "alloc thread create %d\n", thrd ); } } /* Give the new threads a chance to start */ sleep(0); pthread_mutex_lock( &start_lock ); b_start_flag = TRUE; pthread_cond_broadcast( &start_cv ); pthread_mutex_unlock( &start_lock ); /* Let run for the desired seconds then tell all threads to stop */ sleep( runtime ); b_all_stop = TRUE; /* Wait for each alloating thread to complete. */ for (thrd=0; thrd<num_threads; ++thrd) { retval = pthread_join( alloc_threads[thrd], NULL ); if (retval) { errno = retval; FATALSYS( "Allocating thread JOIN %d", thrd ); } } /* Commit the final mailbox array to persistent media */ pmem_persist( mbx_array_ptr, sizeof(mailbox_array_t), 0 ); DEBUG("Done."); exit(0); }