/* * pmemobj_create -- create a transactional memory pool (set) */ PMEMobjpool * pmemobj_create(const char *path, const char *layout, size_t poolsize, mode_t mode) { LOG(3, "path %s layout %s poolsize %zu mode %o", path, layout, poolsize, mode); /* check length of layout */ if (layout && (strlen(layout) >= PMEMOBJ_MAX_LAYOUT)) { ERR("Layout too long"); errno = EINVAL; return NULL; } struct pool_set *set; if (util_pool_create(&set, path, poolsize, PMEMOBJ_MIN_POOL, roundup(sizeof (struct pmemobjpool), Pagesize), OBJ_HDR_SIG, OBJ_FORMAT_MAJOR, OBJ_FORMAT_COMPAT, OBJ_FORMAT_INCOMPAT, OBJ_FORMAT_RO_COMPAT) != 0) { LOG(2, "cannot create pool or pool set"); return NULL; } ASSERT(set->nreplicas > 0); PMEMobjpool *pop; for (unsigned r = 0; r < set->nreplicas; r++) { struct pool_replica *rep = set->replica[r]; pop = rep->part[0].addr; VALGRIND_REMOVE_PMEM_MAPPING(&pop->addr, sizeof (struct pmemobjpool) - ((uintptr_t)&pop->addr - (uintptr_t)&pop->hdr)); pop->addr = pop; pop->size = rep->repsize; /* create pool descriptor */ if (pmemobj_descr_create(pop, layout, set->poolsize) != 0) { LOG(2, "descriptor creation failed"); goto err; } /* initialize replica runtime - is_pmem, funcs, ... */ if (pmemobj_replica_init(pop, rep->is_pmem) != 0) { ERR("pool initialization failed"); goto err; } /* link replicas */ if (r < set->nreplicas - 1) pop->replica = set->replica[r + 1]->part[0].addr; } pop = set->replica[0]->part[0].addr; pop->is_master_replica = 1; for (unsigned r = 1; r < set->nreplicas; r++) { PMEMobjpool *rep = set->replica[r]->part[0].addr; rep->is_master_replica = 0; } VALGRIND_DO_CREATE_MEMPOOL(pop, 0, 0); /* initialize runtime parts - lanes, obj stores, ... */ if (pmemobj_runtime_init(pop, 0, 1 /* boot*/) != 0) { ERR("pool initialization failed"); goto err; } if (util_poolset_chmod(set, mode)) goto err; util_poolset_fdclose(set); util_poolset_free(set); LOG(3, "pop %p", pop); return pop; err: LOG(4, "error clean up"); int oerrno = errno; util_poolset_close(set, 1); errno = oerrno; return NULL; }
/* * pmemobj_open_common -- open a transactional memory pool (set) * * This routine does all the work, but takes a cow flag so internal * calls can map a read-only pool if required. */ static PMEMobjpool * pmemobj_open_common(const char *path, const char *layout, int cow, int boot) { LOG(3, "path %s layout %s cow %d", path, layout, cow); struct pool_set *set; if (util_pool_open(&set, path, cow, PMEMOBJ_MIN_POOL, roundup(sizeof (struct pmemobjpool), Pagesize), OBJ_HDR_SIG, OBJ_FORMAT_MAJOR, OBJ_FORMAT_COMPAT, OBJ_FORMAT_INCOMPAT, OBJ_FORMAT_RO_COMPAT) != 0) { LOG(2, "cannot open pool or pool set"); return NULL; } ASSERT(set->nreplicas > 0); /* read-only mode is not supported in libpmemobj */ if (set->rdonly) { ERR("read-only mode is not supported"); errno = EINVAL; goto err; } PMEMobjpool *pop; for (unsigned r = 0; r < set->nreplicas; r++) { struct pool_replica *rep = set->replica[r]; pop = rep->part[0].addr; VALGRIND_REMOVE_PMEM_MAPPING(&pop->addr, sizeof (struct pmemobjpool) - ((uintptr_t)&pop->addr - (uintptr_t)&pop->hdr)); pop->addr = pop; pop->size = rep->repsize; if (pmemobj_descr_check(pop, layout, set->poolsize) != 0) { LOG(2, "descriptor check failed"); goto err; } /* initialize replica runtime - is_pmem, funcs, ... */ if (pmemobj_replica_init(pop, rep->is_pmem) != 0) { ERR("pool initialization failed"); goto err; } /* link replicas */ if (r < set->nreplicas - 1) pop->replica = set->replica[r + 1]->part[0].addr; } /* * If there is more than one replica, check if all of them are * consistent (recoverable). * On success, choose any replica and copy entire lanes (redo logs) * to all the other replicas to synchronize them. */ if (set->nreplicas > 1) { for (unsigned r = 0; r < set->nreplicas; r++) { pop = set->replica[r]->part[0].addr; if (pmemobj_check_basic(pop) == 0) { ERR("inconsistent replica #%u", r); goto err; } } /* copy lanes */ pop = set->replica[0]->part[0].addr; void *src = (void *)((uintptr_t)pop + pop->lanes_offset); size_t len = pop->nlanes * sizeof (struct lane_layout); for (unsigned r = 1; r < set->nreplicas; r++) { pop = set->replica[r]->part[0].addr; void *dst = (void *)((uintptr_t)pop + pop->lanes_offset); pop->memcpy_persist_local(dst, src, len); } } pop = set->replica[0]->part[0].addr; pop->is_master_replica = 1; for (unsigned r = 1; r < set->nreplicas; r++) { PMEMobjpool *rep = set->replica[r]->part[0].addr; rep->is_master_replica = 0; } #ifdef USE_VG_MEMCHECK heap_vg_open(pop); #endif VALGRIND_DO_CREATE_MEMPOOL(pop, 0, 0); /* initialize runtime parts - lanes, obj stores, ... */ if (pmemobj_runtime_init(pop, 0, boot) != 0) { ERR("pool initialization failed"); goto err; } util_poolset_fdclose(set); util_poolset_free(set); #ifdef USE_VG_MEMCHECK if (boot) pmemobj_vg_boot(pop); #endif LOG(3, "pop %p", pop); return pop; err: LOG(4, "error clean up"); int oerrno = errno; util_poolset_close(set, 0); errno = oerrno; return NULL; }
/* * heap_boot -- opens the heap region of the pmemobj pool * * If successful function returns zero. Otherwise an error number is returned. */ int heap_boot(struct palloc_heap *heap, void *heap_start, uint64_t heap_size, uint64_t run_id, void *base, struct pmem_ops *p_ops) { struct heap_rt *h = Malloc(sizeof(*h)); int err; if (h == NULL) { err = ENOMEM; goto error_heap_malloc; } h->alloc_classes = alloc_class_collection_new(); if (h->alloc_classes == NULL) { err = ENOMEM; goto error_alloc_classes_new; } h->ncaches = heap_get_ncaches(); h->caches = Malloc(sizeof(struct bucket_cache) * h->ncaches); if (h->caches == NULL) { err = ENOMEM; goto error_heap_cache_malloc; } h->max_zone = heap_max_zone(heap_size); h->zones_exhausted = 0; for (int i = 0; i < MAX_RUN_LOCKS; ++i) util_mutex_init(&h->run_locks[i], NULL); heap->run_id = run_id; heap->p_ops = *p_ops; heap->layout = heap_start; heap->rt = h; heap->size = heap_size; heap->base = base; VALGRIND_DO_CREATE_MEMPOOL(heap->layout, 0, 0); for (unsigned i = 0; i < h->ncaches; ++i) bucket_group_init(h->caches[i].buckets); size_t rec_i; for (rec_i = 0; rec_i < MAX_ALLOCATION_CLASSES; ++rec_i) { if ((h->recyclers[rec_i] = recycler_new(heap)) == NULL) { err = ENOMEM; goto error_recycler_new; } } return 0; error_recycler_new: Free(h->caches); for (size_t i = 0; i < rec_i; ++i) recycler_delete(h->recyclers[i]); error_heap_cache_malloc: alloc_class_collection_delete(h->alloc_classes); error_alloc_classes_new: Free(h); heap->rt = NULL; error_heap_malloc: return err; }