/* * vmem_init -- initialization for vmem * * Called automatically by the run-time loader or on the first use of vmem. */ void vmem_init(void) { static bool initialized = false; static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; if (initialized) return; util_mutex_lock(&lock); if (!initialized) { out_init(VMEM_LOG_PREFIX, VMEM_LOG_LEVEL_VAR, VMEM_LOG_FILE_VAR, VMEM_MAJOR_VERSION, VMEM_MINOR_VERSION); out_set_vsnprintf_func(je_vmem_navsnprintf); LOG(3, NULL); util_init(); Header_size = roundup(sizeof(VMEM), Pagesize); /* Set up jemalloc messages to a custom print function */ je_vmem_malloc_message = print_jemalloc_messages; initialized = true; } util_mutex_unlock(&lock); }
/** * get the logfile destination */ int err_getdest(void) { int dest; util_mutex_lock(l_err); dest=err_logdest; util_mutex_unlock(l_err); return dest; }
/** * get current debug level */ int err_getlevel(void) { int level; util_mutex_lock(l_err); level = err_debuglevel; util_mutex_unlock(l_err); return level; }
/* * palloc_exec_actions -- perform the provided free/alloc operations */ static void palloc_exec_actions(struct palloc_heap *heap, struct operation_context *ctx, struct pobj_action_internal *actv, int actvcnt) { /* * The operations array is sorted so that proper lock ordering is * ensured. */ qsort(actv, (size_t)actvcnt, sizeof(struct pobj_action_internal), palloc_action_compare); struct pobj_action_internal *act; for (int i = 0; i < actvcnt; ++i) { act = &actv[i]; /* * This lock must be held for the duration between the creation * of the allocation metadata updates in the operation context * and the operation processing. This is because a different * thread might operate on the same 8-byte value of the run * bitmap and override allocation performed by this thread. */ if (i == 0 || act->lock != actv[i - 1].lock) { if (act->lock) util_mutex_lock(act->lock); } action_funcs[act->type].exec(heap, act, ctx); } /* wait for all the headers to be persistent */ pmemops_drain(&heap->p_ops); operation_process(ctx); for (int i = 0; i < actvcnt; ++i) { act = &actv[i]; action_funcs[act->type].on_process(heap, act); if (i == 0 || act->lock != actv[i - 1].lock) { if (act->lock) util_mutex_unlock(act->lock); } } for (int i = 0; i < actvcnt; ++i) { act = &actv[i]; action_funcs[act->type].on_unlock(heap, act); } }
/* * lane_enter -- (internal) acquire a unique lane number */ static void lane_enter(PMEMblkpool *pbp, unsigned *lane) { unsigned mylane; mylane = __sync_fetch_and_add(&pbp->next_lane, 1) % pbp->nlane; /* lane selected, grab the per-lane lock */ util_mutex_lock(&pbp->locks[mylane]); *lane = mylane; }
/** * Sets the log destination. (stderr, syslog, or logfile) * * \param app appname (used only for syslog destination) * \param destination where to log to \ref log_dests "as defined in err.h" */ void err_setdest(int destination) { if(err_logdest == destination) return; util_mutex_lock(l_err); if((err_logdest & LOGDEST_LOGFILE) && (!(destination & LOGDEST_LOGFILE))) { /* used to be logging to file, not any more */ io_close(err_file); } err_logdest=destination; util_mutex_unlock(l_err); }
/* * nswrite -- (internal) write data to the namespace encapsulating the BTT * * This routine is provided to btt_init() to allow the btt module to * do I/O on the memory pool containing the BTT layout. */ static int nswrite(void *ns, unsigned lane, const void *buf, size_t count, uint64_t off) { struct pmemblk *pbp = (struct pmemblk *)ns; LOG(13, "pbp %p lane %u count %zu off %ju", pbp, lane, count, off); if (off + count > pbp->datasize) { ERR("offset + count (%zu) past end of data area (%zu)", off + count, pbp->datasize); errno = EINVAL; return -1; } void *dest = (char *)pbp->data + off; #ifdef DEBUG /* grab debug write lock */ util_mutex_lock(&pbp->write_lock); #endif /* unprotect the memory (debug version only) */ RANGE_RW(dest, count); if (pbp->is_pmem) pmem_memcpy_nodrain(dest, buf, count); else memcpy(dest, buf, count); /* protect the memory again (debug version only) */ RANGE_RO(dest, count); #ifdef DEBUG /* release debug write lock */ util_mutex_unlock(&pbp->write_lock); #endif if (pbp->is_pmem) pmem_drain(); else pmem_msync(dest, count); return 0; }
/* * lane_hold -- grabs a per-thread lane in a round-robin fashion */ void lane_hold(PMEMobjpool *pop, struct lane_section **section, enum lane_section_type type) { ASSERTne(section, NULL); ASSERTne(pop->lanes, NULL); if (Lane_idx == UINT32_MAX) { do { Lane_idx = __sync_fetch_and_add(&Next_lane_idx, 1); } while (Lane_idx == UINT32_MAX); /* handles wraparound */ } struct lane *lane = &pop->lanes[Lane_idx % pop->nlanes]; util_mutex_lock(lane->lock); *section = &lane->sections[type]; }
/** * Set the debug mask. Given a comma separated list, this walks * through the err_categorylist and sets the bitfields for the * requested log modules. * * \param list comma separated list of modules to debug. */ extern int err_setdebugmask(char *list) { unsigned int rack; char *token, *str, *last; char *tmpstr; int index; err_debugmask=0x80000000; /* always log L_MISC! */ str=tmpstr=strdup(list); if(!str) return 0; util_mutex_lock(l_err); while(1) { token=strtok_r(str,",",&last); str=NULL; if(token) { rack=1; index=0; while((err_categorylist[index]) && (strcasecmp(err_categorylist[index],token))) { rack <<= 1; index++; } if(!err_categorylist[index]) { util_mutex_unlock(l_err); DPRINTF(E_LOG,L_MISC,"Unknown module: %s\n",token); free(tmpstr); return 1; } else { err_debugmask |= rack; } } else break; /* !token */ } util_mutex_unlock(l_err); DPRINTF(E_INF,L_MISC,"Debug mask is 0x%08x\n",err_debugmask); free(tmpstr); return 0; }
/** * simple get/set interface to debuglevel to avoid global */ void err_setlevel(int level) { util_mutex_lock(l_err); err_debuglevel = level; util_mutex_unlock(l_err); }
/** * Write a printf-style formatted message to the log destination. * This can be stderr, syslog/eventviewer, or a logfile, as determined by * err_setdest(). Note that this function should not be directly * used, rather it should be used via the #DPRINTF macro. * * \param level Level at which to log \ref log_levels * \param cat the category to log \ref log_categories * \param fmt printf-style */ void err_log(int level, unsigned int cat, char *fmt, ...) { va_list ap; char timebuf[256]; char errbuf[4096]; struct tm tm_now; time_t tt_now; int syslog_only = FALSE; if(level > 1) { if(level > err_debuglevel) return; if(!(cat & err_debugmask)) return; } /* we'll *always* process a log level 0 or 1 */ /* skip recursive calls to logging functions to avoid deadlocks (except for aborts) */ util_mutex_lock(l_err_list); if(err_threadlist.next && __err_thread_check()) { /* skip logging */ if(!level) { syslog_only = TRUE; /* syslog fatals even on recursive calls */ } else { util_mutex_unlock(l_err_list); return; } } __err_thread_add(); util_mutex_unlock(l_err_list); va_start(ap, fmt); vsnprintf(errbuf, sizeof(errbuf), fmt, ap); va_end(ap); /* always log fatals and level 1 to syslog */ if(level <= 1) { if(!err_syslog_open) os_opensyslog(); err_syslog_open=1; //os_syslog(level,errbuf); if(syslog_only && !level) { fprintf(stderr,"Aborting\n"); exit(-1); } } util_mutex_lock(l_err); if((err_logdest & LOGDEST_LOGFILE) && (err_file) && (!syslog_only)) { tt_now=time(NULL); localtime_r(&tt_now,&tm_now); snprintf(timebuf,sizeof(timebuf),"%04d-%02d-%02d %02d:%02d:%02d", tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday, tm_now.tm_hour, tm_now.tm_min, tm_now.tm_sec); io_printf(err_file,"%s (%08x): %s",timebuf,__err_get_threadid(),errbuf); if(!level) io_printf(err_file,"%s: Aborting\n",timebuf); } /* always log to stderr on fatal error */ if((err_logdest & LOGDEST_STDERR) || (!level)) { fprintf(stderr, "%s",errbuf); if(!level) fprintf(stderr,"Aborting\n"); } util_mutex_unlock(l_err); #ifndef ERR_LEAN if(level < 2) { /* only event level fatals and log level */ plugin_event_dispatch(PLUGIN_EVENT_LOG, level, errbuf, (int)strlen(errbuf)+1); } #endif util_mutex_lock(l_err_list); __err_thread_del(); util_mutex_unlock(l_err_list); if(!level) { exit(EXIT_FAILURE); /* this should go to an OS-specific exit routine */ } }
/* * heap_ensure_run_bucket_filled -- (internal) refills the bucket if needed */ static int heap_ensure_run_bucket_filled(struct palloc_heap *heap, struct bucket *b, uint32_t units) { ASSERTeq(b->aclass->type, CLASS_RUN); if (b->is_active) { b->c_ops->rm_all(b->container); b->active_memory_block.m_ops ->claim_revoke(&b->active_memory_block); b->is_active = 0; } struct heap_rt *h = heap->rt; struct memory_block m = MEMORY_BLOCK_NONE; if (recycler_get(h->recyclers[b->aclass->id], &m) == 0) { pthread_mutex_t *lock = m.m_ops->get_lock(&m); util_mutex_lock(lock); heap_reuse_run(heap, b, &m); util_mutex_unlock(lock); b->active_memory_block = m; b->is_active = 1; return 0; } m.size_idx = b->aclass->run.size_idx; /* cannot reuse an existing run, create a new one */ struct bucket *defb = heap_get_default_bucket(heap); util_mutex_lock(&defb->lock); if (heap_get_bestfit_block(heap, defb, &m) == 0) { ASSERTeq(m.block_off, 0); heap_create_run(heap, b, &m); b->active_memory_block = m; b->is_active = 1; util_mutex_unlock(&defb->lock); return 0; } util_mutex_unlock(&defb->lock); /* * Try the recycler again, the previous call to the bestfit_block for * huge chunks might have reclaimed some unused runs. */ if (recycler_get(h->recyclers[b->aclass->id], &m) == 0) { pthread_mutex_t *lock = m.m_ops->get_lock(&m); util_mutex_lock(lock); heap_reuse_run(heap, b, &m); util_mutex_unlock(lock); /* * To verify that the recycler run is not able to satisfy our * request we attempt to retrieve a block. This is not ideal, * and should be replaced by a different heuristic once proper * memory block scoring is implemented. */ struct memory_block tmp = MEMORY_BLOCK_NONE; tmp.size_idx = units; if (b->c_ops->get_rm_bestfit(b->container, &tmp) != 0) { b->c_ops->rm_all(b->container); m.m_ops->claim_revoke(&m); return ENOMEM; } else { bucket_insert_block(b, &tmp); } b->active_memory_block = m; b->is_active = 1; return 0; } return ENOMEM; }
/* * heap_reclaim_run -- checks the run for available memory if unclaimed. * * Returns 1 if reclaimed chunk, 0 otherwise. */ static int heap_reclaim_run(struct palloc_heap *heap, struct chunk_run *run, struct memory_block *m) { if (m->m_ops->claim(m) != 0) return 0; /* this run already has an owner */ struct alloc_class *c = alloc_class_get_create_by_unit_size( heap->rt->alloc_classes, run->block_size); if (c == NULL) return 0; ASSERTeq(c->type, CLASS_RUN); pthread_mutex_t *lock = m->m_ops->get_lock(m); util_mutex_lock(lock); unsigned i; unsigned nval = c->run.bitmap_nval; for (i = 0; nval > 0 && i < nval - 1; ++i) if (run->bitmap[i] != 0) break; int empty = (i == (nval - 1)) && (run->bitmap[i] == c->run.bitmap_lastval); if (empty) { struct zone *z = ZID_TO_ZONE(heap->layout, m->zone_id); struct chunk_header *hdr = &z->chunk_headers[m->chunk_id]; struct bucket *defb = heap_get_default_bucket(heap); /* * The redo log ptr can be NULL if we are sure that there's only * one persistent value modification in the entire operation * context. */ struct operation_context ctx; operation_init(&ctx, heap->base, NULL, NULL); ctx.p_ops = &heap->p_ops; struct memory_block nb = MEMORY_BLOCK_NONE; nb.chunk_id = m->chunk_id; nb.zone_id = m->zone_id; nb.block_off = 0; nb.size_idx = m->size_idx; heap_chunk_init(heap, hdr, CHUNK_TYPE_FREE, nb.size_idx); memblock_rebuild_state(heap, &nb); nb = heap_coalesce_huge(heap, &nb); nb.m_ops->prep_hdr(&nb, MEMBLOCK_FREE, &ctx); operation_process(&ctx); bucket_insert_block(defb, &nb); *m = nb; } else { recycler_put(heap->rt->recyclers[c->id], m); } util_mutex_unlock(lock); return empty; }
/* * palloc_operation -- persistent memory operation. Takes a NULL pointer * or an existing memory block and modifies it to occupy, at least, 'size' * number of bytes. * * The malloc, free and realloc routines are implemented in the context of this * common operation which encompasses all of the functionality usually done * separately in those methods. * * The first thing that needs to be done is determining which memory blocks * will be affected by the operation - this varies depending on the whether the * operation will need to modify or free an existing block and/or allocate * a new one. * * Simplified allocation process flow is as follows: * - reserve a new block in the transient heap * - prepare the new block * - create redo log of required modifications * - chunk metadata * - offset of the new object * - commit and process the redo log * * And similarly, the deallocation process: * - create redo log of required modifications * - reverse the chunk metadata back to the 'free' state * - set the destination of the object offset to zero * - commit and process the redo log * - return the memory block back to the free blocks transient heap * * Reallocation is a combination of the above, which one additional step * of copying the old content in the meantime. */ int palloc_operation(struct palloc_heap *heap, uint64_t off, uint64_t *dest_off, size_t size, palloc_constr constructor, void *arg, struct operation_context *ctx) { struct bucket *b = NULL; struct allocation_header *alloc = NULL; struct memory_block existing_block = {0, 0, 0, 0}; struct memory_block new_block = {0, 0, 0, 0}; struct memory_block reclaimed_block = {0, 0, 0, 0}; int ret = 0; /* * These two lock are responsible for protecting the metadata for the * persistent representation of a chunk. Depending on the operation and * the type of a chunk, they might be NULL. */ pthread_mutex_t *existing_block_lock = NULL; pthread_mutex_t *new_block_lock = NULL; size_t sizeh = size + sizeof(struct allocation_header); /* * The offset of an existing block can be nonzero which means this * operation is either free or a realloc - either way the offset of the * object needs to be translated into structure that all of the heap * methods operate in. */ if (off != 0) { alloc = ALLOC_GET_HEADER(heap, off); existing_block = get_mblock_from_alloc(heap, alloc); /* * This lock must be held until the operation is processed * successfully, because other threads might operate on the * same bitmap value. */ existing_block_lock = MEMBLOCK_OPS(AUTO, &existing_block)-> get_lock(&existing_block, heap); if (existing_block_lock != NULL) util_mutex_lock(existing_block_lock); #ifdef DEBUG if (MEMBLOCK_OPS(AUTO, &existing_block)->get_state(&existing_block, heap) != MEMBLOCK_ALLOCATED) { ERR("Double free or heap corruption"); ASSERT(0); } #endif /* DEBUG */ /* * The memory block must return back to the originating bucket, * otherwise coalescing of neighbouring blocks will be rendered * impossible. * * If the block was allocated in a different incarnation of the * heap (i.e. the application was restarted) and the chunk from * which the allocation comes from was not yet processed, the * originating bucket does not exists and all of the otherwise * necessary volatile heap modifications won't be performed for * this memory block. */ b = heap_get_chunk_bucket(heap, alloc->chunk_id, alloc->zone_id); } /* if allocation or reallocation, reserve new memory */ if (size != 0) { /* reallocation to exactly the same size, which is a no-op */ if (alloc != NULL && alloc->size == sizeh) goto out; errno = alloc_reserve_block(heap, &new_block, sizeh); if (errno != 0) { ret = -1; goto out; } } /* * The offset value which is to be written to the destination pointer * provided by the caller. */ uint64_t offset_value = 0; /* lock and persistently free the existing memory block */ if (!MEMORY_BLOCK_IS_EMPTY(existing_block)) { /* * This method will insert new entries into the operation * context which will, after processing, update the chunk * metadata to 'free' - it also takes care of all the necessary * coalescing of blocks. * Even though the transient state of the heap is used during * this method to locate neighbouring blocks, it isn't modified. * * The rb block is the coalesced memory block that the free * resulted in, to prevent volatile memory leak it needs to be * inserted into the corresponding bucket. */ reclaimed_block = heap_free_block(heap, b, existing_block, ctx); offset_value = 0; } if (!MEMORY_BLOCK_IS_EMPTY(new_block)) { if (alloc_prep_block(heap, new_block, constructor, arg, &offset_value) != 0) { /* * Constructor returned non-zero value which means * the memory block reservation has to be rolled back. */ struct bucket *new_bucket = heap_get_chunk_bucket(heap, new_block.chunk_id, new_block.zone_id); ASSERTne(new_bucket, NULL); /* * Omitting the context in this method results in * coalescing of blocks without affecting the persistent * heap state. */ new_block = heap_free_block(heap, new_bucket, new_block, NULL); CNT_OP(new_bucket, insert, heap, new_block); if (new_bucket->type == BUCKET_RUN) heap_degrade_run_if_empty(heap, new_bucket, new_block); errno = ECANCELED; ret = -1; goto out; } /* * This lock must be held for the duration between the creation * of the allocation metadata updates in the operation context * and the operation processing. This is because a different * thread might operate on the same 8-byte value of the run * bitmap and override allocation performed by this thread. */ new_block_lock = MEMBLOCK_OPS(AUTO, &new_block)-> get_lock(&new_block, heap); /* the locks might be identical in the case of realloc */ if (new_block_lock == existing_block_lock) new_block_lock = NULL; if (new_block_lock != NULL) util_mutex_lock(new_block_lock); #ifdef DEBUG if (MEMBLOCK_OPS(AUTO, &new_block)->get_state(&new_block, heap) != MEMBLOCK_FREE) { ERR("Double free or heap corruption"); ASSERT(0); } #endif /* DEBUG */ /* * The actual required metadata modifications are chunk-type * dependent, but it always is a modification of a single 8 byte * value - either modification of few bits in a bitmap or * changing a chunk type from free to used. */ MEMBLOCK_OPS(AUTO, &new_block)->prep_hdr(&new_block, heap, MEMBLOCK_ALLOCATED, ctx); } /* not in-place realloc */ if (!MEMORY_BLOCK_IS_EMPTY(existing_block) && !MEMORY_BLOCK_IS_EMPTY(new_block)) { size_t old_size = alloc->size; size_t to_cpy = old_size > sizeh ? sizeh : old_size; VALGRIND_ADD_TO_TX(PMALLOC_OFF_TO_PTR(heap, offset_value), to_cpy - ALLOC_OFF); pmemops_memcpy_persist(&heap->p_ops, PMALLOC_OFF_TO_PTR(heap, offset_value), PMALLOC_OFF_TO_PTR(heap, off), to_cpy - ALLOC_OFF); VALGRIND_REMOVE_FROM_TX(PMALLOC_OFF_TO_PTR(heap, offset_value), to_cpy - ALLOC_OFF); } /* * If the caller provided a destination value to update, it needs to be * modified atomically alongside the heap metadata, and so the operation * context must be used. * The actual offset value depends on whether the operation type. */ if (dest_off != NULL) operation_add_entry(ctx, dest_off, offset_value, OPERATION_SET); operation_process(ctx); /* * After the operation succeeded, the persistent state is all in order * but in some cases it might not be in-sync with the its transient * representation. */ if (!MEMORY_BLOCK_IS_EMPTY(existing_block)) { VALGRIND_DO_MEMPOOL_FREE(heap->layout, (char *)heap_get_block_data(heap, existing_block) + ALLOC_OFF); /* we might have been operating on inactive run */ if (b != NULL) { /* * Even though the initial condition is to check * whether the existing block exists it's important to * use the 'reclaimed block' - it is the coalesced one * and reflects the current persistent heap state, * whereas the existing block reflects the state from * before this operation started. */ CNT_OP(b, insert, heap, reclaimed_block); /* * Degrading of a run means turning it back into a chunk * in case it's no longer needed. * It might be tempting to defer this operation until * such time that the chunk is actually needed, but * right now the decision is to keep the persistent heap * state as clean as possible - and that means not * leaving unused data around. */ if (b->type == BUCKET_RUN) heap_degrade_run_if_empty(heap, b, reclaimed_block); } } out: if (new_block_lock != NULL) util_mutex_unlock(new_block_lock); if (existing_block_lock != NULL) util_mutex_unlock(existing_block_lock); return ret; }