void hp_pkg_free(struct hp_block *hpb, void *p) { struct hp_frag *f, *next; if (!p) { LM_WARN("free(0) called\n"); return; } f = FRAG_OF(p); /* * for private memory, coalesce as many consecutive fragments as possible * The same operation is not performed for shared memory, because: * - performance penalties introduced by additional locking logic * - the allocator itself actually favours fragmentation and reusage */ for (;;) { next = FRAG_NEXT(f); if (next >= hpb->last_frag || !next->prev) break; hp_frag_detach(hpb, next); update_stats_pkg_frag_detach(hpb, next); f->size += next->size + FRAG_OVERHEAD; update_stats_pkg_frag_merge(hpb); } hp_frag_attach(hpb, f); update_stats_pkg_frag_attach(hpb, f); }
void *hp_pkg_malloc(struct hp_block *hpb, unsigned long size) { struct hp_frag *frag; unsigned int hash; /* size must be a multiple of ROUNDTO */ size = ROUNDUP(size); /* search for a suitable free frag */ for (hash = GET_HASH(size); hash < HP_HASH_SIZE; hash++) { frag = hpb->free_hash[hash].first; for (; frag; frag = frag->u.nxt_free) if (frag->size >= size) goto found; /* try in a bigger bucket */ } /* out of memory... we have to shut down */ LM_CRIT("not enough memory, please increase the \"-M\" parameter!\n"); abort(); found: hp_frag_detach(hpb, frag); update_stats_pkg_frag_detach(hpb, frag); /* split the fragment if possible */ pkg_frag_split(hpb, frag, size); if (hpb->real_used > hpb->max_real_used) hpb->max_real_used = hpb->real_used; pkg_threshold_check(); return (char *)frag + sizeof *frag; }
void *hp_pkg_realloc(struct hp_block *hpb, void *p, unsigned long size) { struct hp_frag *f; unsigned long diff; unsigned long orig_size; struct hp_frag *next; void *ptr; if (size == 0) { if (p) hp_pkg_free(hpb, p); return NULL; } if (!p) return hp_pkg_malloc(hpb, size); f = FRAG_OF(p); size = ROUNDUP(size); orig_size = f->size; /* shrink operation */ if (orig_size > size) { pkg_frag_split(hpb, f, size); /* grow operation */ } else if (orig_size < size) { diff = size - orig_size; next = FRAG_NEXT(f); /* try to join with a large enough adjacent free fragment */ if (next < hpb->last_frag && next->prev && (next->size + FRAG_OVERHEAD) >= diff) { hp_frag_detach(hpb, next); update_stats_pkg_frag_detach(hpb, next); f->size += next->size + FRAG_OVERHEAD; /* split the result if necessary */ if (f->size > size) pkg_frag_split(hpb, f, size); } else { /* could not join => realloc */ ptr = hp_pkg_malloc(hpb, size); if (ptr) { /* copy, need by libssl */ memcpy(ptr, p, orig_size); hp_pkg_free(hpb, p); } p = ptr; } if (hpb->real_used > hpb->max_real_used) hpb->max_real_used = hpb->real_used; } pkg_threshold_check(); return p; }
/* * Note: as opposed to hp_shm_malloc_unsafe(), * hp_shm_malloc() assumes that the core statistics are initialized */ void *hp_shm_malloc(struct hp_block *hpb, unsigned long size) { struct hp_frag *frag; unsigned int init_hash, hash, sec_hash; int i; /* size must be a multiple of ROUNDTO */ size = ROUNDUP(size); /*search for a suitable free frag*/ for (hash = GET_HASH(size), init_hash = hash; hash < HP_HASH_SIZE; hash++) { if (!hpb->free_hash[hash].is_optimized) { SHM_LOCK(hash); frag = hpb->free_hash[hash].first; for (; frag; frag = frag->u.nxt_free) if (frag->size >= size) goto found; SHM_UNLOCK(hash); } else { /* optimized size. search through its own hash! */ for (i = 0, sec_hash = HP_HASH_SIZE + hash * shm_secondary_hash_size + optimized_get_indexes[hash]; i < shm_secondary_hash_size; i++, sec_hash = (sec_hash + 1) % shm_secondary_hash_size) { SHM_LOCK(sec_hash); frag = hpb->free_hash[sec_hash].first; for (; frag; frag = frag->u.nxt_free) if (frag->size >= size) { /* free fragments are detached in a simple round-robin manner */ optimized_get_indexes[hash] = (optimized_get_indexes[hash] + i + 1) % shm_secondary_hash_size; hash = sec_hash; goto found; } SHM_UNLOCK(sec_hash); } } /* try in a bigger bucket */ } /* out of memory... we have to shut down */ LM_CRIT("not enough shared memory, please increase the \"-m\" parameter!\n"); abort(); found: hp_frag_detach(hpb, frag); /* split the fragment if possible */ shm_frag_split(hpb, frag, size, hash); SHM_UNLOCK(hash); update_stats_shm_frag_detach(frag); #ifndef HP_MALLOC_FAST_STATS unsigned long real_used; real_used = get_stat_val(shm_rused); if (real_used > hpb->max_real_used) hpb->max_real_used = real_used; #endif /* ignore concurrency issues, simply obtaining an estimate is enough */ mem_hash_usage[init_hash]++; return (char *)frag + sizeof *frag; }
/** * on-demand memory fragmentation, based on an input pattern file */ int hp_mem_warming(struct hp_block *hpb) { struct size_fraction { int hash_index; double amount; unsigned long fragments; struct size_fraction *next; }; struct size_fraction *sf, *it, *sorted_sf = NULL; FILE *f; size_t rc; unsigned long roundto, hash_size; long long bucket_mem; int i, c = 0; unsigned int current_frag_size; struct hp_frag *big_frag; unsigned int optimized_buckets; f = fopen(mem_warming_pattern_file, "r"); if (!f) { LM_ERR("failed to open pattern file %s: %d - %s\n", mem_warming_pattern_file, errno, strerror(errno)); return -1; } rc = fscanf(f, "%lu %lu\n", &roundto, &hash_size); if (rc != 2) { LM_ERR("failed to read from %s: bad file format\n", mem_warming_pattern_file); goto out; } rc = 0; if (roundto != ROUNDTO || hash_size != HP_HASH_SIZE) { LM_ERR("incompatible pattern file data: [HP_HASH_SIZE: %lu-%lu] " "[ROUNDTO: %lu-%lu]\n", hash_size, HP_HASH_SIZE, roundto, ROUNDTO); rc = -1; goto out; } /* read bucket usage percentages and sort them by number of fragments */ for (i = 0; i < HP_LINEAR_HASH_SIZE; i++) { sf = malloc(sizeof *sf); if (!sf) { LM_INFO("malloc failed, skipping shm warming\n"); rc = -1; goto out_free; } sf->hash_index = i; sf->next = NULL; if (fscanf(f, "%lf", &sf->amount) != 1) { LM_CRIT("%s appears to be corrupt. Please remove it first\n", mem_warming_pattern_file); abort(); } if (i == 0) sf->fragments = 0; else sf->fragments = sf->amount * hpb->size / (ROUNDTO * i); if (!sorted_sf) sorted_sf = sf; else { for (it = sorted_sf; it->next && it->next->fragments > sf->fragments; it = it->next) ; if (it->fragments < sf->fragments) { sf->next = sorted_sf; sorted_sf = sf; } else { sf->next = it->next; it->next = sf; } } } /* only optimize the configured number of buckets */ optimized_buckets = (float)shm_hash_split_percentage / 100 * HP_LINEAR_HASH_SIZE; LM_INFO("Optimizing %u / %lu mem buckets\n", optimized_buckets, HP_LINEAR_HASH_SIZE); sf = sorted_sf; for (i = 0; i < optimized_buckets; i++) { hpb->free_hash[sf->hash_index].is_optimized = 1; sf = sf->next; } big_frag = hpb->first_frag; /* populate each free hash bucket with proper number of fragments */ for (sf = sorted_sf; sf; sf = sf->next) { LM_INFO("[%d][%s] fraction: %.12lf total mem: %llu, %lu\n", sf->hash_index, hpb->free_hash[sf->hash_index].is_optimized ? "X" : " ", sf->amount, (unsigned long long) (sf->amount * hpb->size * mem_warming_percentage / 100), ROUNDTO * sf->hash_index); current_frag_size = ROUNDTO * sf->hash_index; bucket_mem = sf->amount * hpb->size * mem_warming_percentage / 100; /* create free fragments worth of 'bucket_mem' memory */ while (bucket_mem >= FRAG_OVERHEAD + current_frag_size) { hp_frag_detach(hpb, big_frag); if (stats_are_ready()) update_stats_shm_frag_detach(big_frag); else { hpb->used += big_frag->size; hpb->real_used += big_frag->size + FRAG_OVERHEAD; } /* trim-insert operation on the big free fragment */ shm_frag_split_unsafe(hpb, big_frag, current_frag_size); /* * "big_frag" now points to a smaller, free and detached frag. * * With optimized buckets, inserts will be automagically * balanced within their dedicated hashes */ hp_frag_attach(hpb, big_frag); if (stats_are_ready()) update_stats_shm_frag_attach(big_frag); else { hpb->used -= big_frag->size; hpb->real_used -= big_frag->size + FRAG_OVERHEAD; } big_frag = FRAG_NEXT(big_frag); bucket_mem -= FRAG_OVERHEAD + current_frag_size; if (c % 1000000 == 0) LM_INFO("%d| %lld %p\n", c, bucket_mem, big_frag); c++; } } out_free: while (sorted_sf) { sf = sorted_sf; sorted_sf = sorted_sf->next; free(sf); } out: fclose(f); return rc; }
/* * although there is a lot of duplicate code, we get the best performance: * * - the _unsafe version will not be used too much anyway (usually at startup) * - hp_shm_malloc is faster (no 3rd parameter, no extra if blocks) */ void *hp_shm_malloc_unsafe(struct hp_block *qm, unsigned long size) { struct hp_frag *frag; unsigned int init_hash, hash, sec_hash; int i; /* size must be a multiple of ROUNDTO */ size = ROUNDUP(size); /*search for a suitable free frag*/ for (hash = GET_HASH(size), init_hash = hash; hash < HP_HASH_SIZE; hash++) { if (!qm->free_hash[hash].is_optimized) { frag = qm->free_hash[hash].first; for (; frag; frag = frag->u.nxt_free) if (frag->size >= size) goto found; } else { /* optimized size. search through its own hash! */ for (i = 0, sec_hash = HP_HASH_SIZE + hash * shm_secondary_hash_size + optimized_get_indexes[hash]; i < shm_secondary_hash_size; i++, sec_hash = (sec_hash + 1) % shm_secondary_hash_size) { frag = qm->free_hash[sec_hash].first; for (; frag; frag = frag->u.nxt_free) if (frag->size >= size) { /* free fragments are detached in a simple round-robin manner */ optimized_get_indexes[hash] = (optimized_get_indexes[hash] + i + 1) % shm_secondary_hash_size; hash = sec_hash; goto found; } } } /* try in a bigger bucket */ } /* out of memory... we have to shut down */ LM_CRIT("not enough shared memory, please increase the \"-m\" parameter!\n"); abort(); found: hp_frag_detach(qm, frag); if (stats_are_ready()) update_stats_shm_frag_detach(frag); else { qm->used += frag->size; qm->real_used += frag->size + FRAG_OVERHEAD; } /* split the fragment if possible */ shm_frag_split_unsafe(qm, frag, size); #ifndef HP_MALLOC_FAST_STATS if (stats_are_ready()) { unsigned long real_used; real_used = get_stat_val(shm_rused); if (real_used > qm->max_real_used) qm->max_real_used = real_used; } else if (qm->real_used > qm->max_real_used) qm->max_real_used = qm->real_used; #endif if (mem_hash_usage) mem_hash_usage[init_hash]++; return (char *)frag + sizeof *frag; }