void* mono_sgen_nursery_alloc (size_t size) { Fragment *frag; DEBUG (4, fprintf (gc_debug_file, "Searching nursery for size: %zd\n", size)); size = SGEN_ALIGN_UP (size); HEAVY_STAT (InterlockedIncrement (&stat_nursery_alloc_requests)); #ifdef NALLOC_DEBUG InterlockedIncrement (&alloc_count); #endif restart: for (frag = unmask (nursery_fragments); frag; frag = unmask (frag->next)) { HEAVY_STAT (InterlockedIncrement (&stat_alloc_iterations)); if (size <= (frag->fragment_end - frag->fragment_next)) { void *p = alloc_from_fragment (frag, size); if (!p) { HEAVY_STAT (InterlockedIncrement (&stat_alloc_retries)); goto restart; } #ifdef NALLOC_DEBUG add_alloc_record (p, size, FIXED_ALLOC); #endif return p; } } return NULL; }
/* * size is already rounded up and we hold the GC lock. */ static void* major_alloc_degraded (MonoVTable *vtable, size_t size) { GCMemSection *section; void **p = NULL; g_assert (size <= SGEN_MAX_SMALL_OBJ_SIZE); HEAVY_STAT (++stat_objects_alloced_degraded); HEAVY_STAT (stat_bytes_alloced_degraded += size); for (section = section_list; section; section = section->block.next) { if ((section->end_data - section->next_data) >= size) { p = (void**)section->next_data; break; } } if (!p) { section = alloc_major_section (); section->is_to_space = FALSE; /* FIXME: handle OOM */ p = (void**)section->next_data; sgen_register_major_sections_alloced (1); } section->next_data += size; DEBUG (3, fprintf (gc_debug_file, "Allocated (degraded) object %p, vtable: %p (%s), size: %zd in section %p\n", p, vtable, vtable->klass->name, size, section)); *p = vtable; return p; }
void* sgen_fragment_allocator_par_alloc (SgenFragmentAllocator *allocator, size_t size) { SgenFragment *frag; #ifdef NALLOC_DEBUG InterlockedIncrement (&alloc_count); #endif restart: for (frag = (SgenFragment *)unmask (allocator->alloc_head); unmask (frag); frag = (SgenFragment *)unmask (frag->next)) { HEAVY_STAT (++stat_alloc_iterations); if (size <= (size_t)(frag->fragment_end - frag->fragment_next)) { void *p = par_alloc_from_fragment (allocator, frag, size); if (!p) { HEAVY_STAT (++stat_alloc_retries); goto restart; } #ifdef NALLOC_DEBUG add_alloc_record (p, size, FIXED_ALLOC); #endif return p; } } return NULL; }
static void major_copy_or_mark_object (void **ptr, SgenGrayQueue *queue) { void *obj = *ptr; MSBlockInfo *block; HEAVY_STAT (++stat_copy_object_called_major); DEBUG (9, g_assert (obj)); DEBUG (9, g_assert (current_collection_generation == GENERATION_OLD)); if (ptr_in_nursery (obj)) { int word, bit; char *forwarded; if ((forwarded = SGEN_OBJECT_IS_FORWARDED (obj))) { *ptr = forwarded; return; } if (SGEN_OBJECT_IS_PINNED (obj)) return; HEAVY_STAT (++stat_objects_copied_major); obj = copy_object_no_checks (obj, queue); *ptr = obj; /* * FIXME: See comment for copy_object_no_checks(). If * we have that, we can let the allocation function * give us the block info, too, and we won't have to * re-fetch it. */ block = MS_BLOCK_FOR_OBJ (obj); MS_CALC_MARK_BIT (word, bit, obj); DEBUG (9, g_assert (!MS_MARK_BIT (block, word, bit))); MS_SET_MARK_BIT (block, word, bit); } else { #ifdef FIXED_HEAP if (MS_PTR_IN_SMALL_MAJOR_HEAP (obj)) #else mword objsize; objsize = SGEN_ALIGN_UP (mono_sgen_safe_object_get_size ((MonoObject*)obj)); if (objsize <= SGEN_MAX_SMALL_OBJ_SIZE) #endif { block = MS_BLOCK_FOR_OBJ (obj); MS_MARK_OBJECT_AND_ENQUEUE (obj, block, queue); } else { if (SGEN_OBJECT_IS_PINNED (obj)) return; binary_protocol_pin (obj, (gpointer)SGEN_LOAD_VTABLE (obj), mono_sgen_safe_object_get_size ((MonoObject*)obj)); SGEN_PIN_OBJECT (obj); /* FIXME: only enqueue if object has references */ GRAY_OBJECT_ENQUEUE (queue, obj); } } }
/* * Tries to check if a given remset location was already added to the global remset. * It can * * A 2 entry, LRU cache of recently saw location remsets. * * It's hand-coded instead of done using loops to reduce the number of memory references on cache hit. * * Returns TRUE is the element was added.. */ static gboolean global_remset_location_was_not_added (gpointer ptr) { gpointer first = global_remset_cache [0], second; if (first == ptr) { HEAVY_STAT (++stat_global_remsets_discarded); return FALSE; } second = global_remset_cache [1]; if (second == ptr) { /*Move the second to the front*/ global_remset_cache [0] = second; global_remset_cache [1] = first; HEAVY_STAT (++stat_global_remsets_discarded); return FALSE; } global_remset_cache [0] = second; global_remset_cache [1] = ptr; return TRUE; }
/* * size is already rounded up and we hold the GC lock. */ static void* major_alloc_degraded (MonoVTable *vtable, size_t size) { void *obj; int old_num_sections = num_major_sections; obj = alloc_obj (size, FALSE, vtable->klass->has_references); *(MonoVTable**)obj = vtable; HEAVY_STAT (++stat_objects_alloced_degraded); HEAVY_STAT (stat_bytes_alloced_degraded += size); g_assert (num_major_sections >= old_num_sections); mono_sgen_register_major_sections_alloced (num_major_sections - old_num_sections); return obj; }
static void mono_sgen_ssb_wbarrier_generic_nostore (gpointer ptr) { gpointer *buffer; int index; TLAB_ACCESS_INIT; LOCK_GC; buffer = STORE_REMSET_BUFFER; index = STORE_REMSET_BUFFER_INDEX; /* This simple optimization eliminates a sizable portion of entries. Comparing it to the last but one entry as well doesn't eliminate significantly more entries. */ if (buffer [index] == ptr) { UNLOCK_GC; return; } HEAVY_STAT (++stat_wbarrier_generic_store_remset); ++index; if (index >= STORE_REMSET_BUFFER_SIZE) { evacuate_remset_buffer (); index = STORE_REMSET_BUFFER_INDEX; g_assert (index == 0); ++index; } buffer [index] = ptr; STORE_REMSET_BUFFER_INDEX = index; UNLOCK_GC; }
void sgen_gray_object_enqueue (SgenGrayQueue *queue, GCObject *obj, SgenDescriptor desc) { GrayQueueEntry entry = SGEN_GRAY_QUEUE_ENTRY (obj, desc); HEAVY_STAT (stat_gray_queue_enqueue_slow_path ++); SGEN_ASSERT (9, obj, "enqueueing a null object"); //sgen_check_objref (obj); #ifdef SGEN_CHECK_GRAY_OBJECT_ENQUEUE if (queue->enqueue_check_func) queue->enqueue_check_func (obj); #endif if (G_UNLIKELY (!queue->first || queue->cursor == GRAY_LAST_CURSOR_POSITION (queue->first))) { if (queue->first) { /* Set the current section size back to default, might have been changed by sgen_gray_object_dequeue_section */ queue->first->size = SGEN_GRAY_QUEUE_SECTION_SIZE; } sgen_gray_object_alloc_queue_section (queue); } STATE_ASSERT (queue->first, GRAY_QUEUE_SECTION_STATE_ENQUEUED); SGEN_ASSERT (9, queue->cursor <= GRAY_LAST_CURSOR_POSITION (queue->first), "gray queue %p overflow, first %p, cursor %p", queue, queue->first, queue->cursor); *++queue->cursor = entry; #ifdef SGEN_HEAVY_BINARY_PROTOCOL binary_protocol_gray_enqueue (queue, queue->cursor, obj); #endif }
void sgen_gray_object_alloc_queue_section (SgenGrayQueue *queue) { GrayQueueSection *section; HEAVY_STAT (stat_gray_queue_section_alloc ++); if (queue->alloc_prepare_func) queue->alloc_prepare_func (queue); if (queue->free_list) { /* Use the previously allocated queue sections if possible */ section = queue->free_list; queue->free_list = section->next; STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_FREE_LIST, GRAY_QUEUE_SECTION_STATE_FLOATING); } else { /* Allocate a new section */ section = (GrayQueueSection *)sgen_alloc_internal (INTERNAL_MEM_GRAY_QUEUE); STATE_SET (section, GRAY_QUEUE_SECTION_STATE_FLOATING); } section->size = SGEN_GRAY_QUEUE_SECTION_SIZE; STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_FLOATING, GRAY_QUEUE_SECTION_STATE_ENQUEUED); /* Link it with the others */ section->next = queue->first; queue->first = section; queue->cursor = section->entries - 1; }
GrayQueueEntry sgen_gray_object_dequeue (SgenGrayQueue *queue) { GrayQueueEntry entry; HEAVY_STAT (stat_gray_queue_dequeue_slow_path ++); if (sgen_gray_object_queue_is_empty (queue)) { entry.obj = NULL; return entry; } STATE_ASSERT (queue->first, GRAY_QUEUE_SECTION_STATE_ENQUEUED); SGEN_ASSERT (9, queue->cursor >= GRAY_FIRST_CURSOR_POSITION (queue->first), "gray queue %p underflow", queue); entry = *queue->cursor--; #ifdef SGEN_HEAVY_BINARY_PROTOCOL binary_protocol_gray_dequeue (queue, queue->cursor + 1, entry.obj); #endif if (G_UNLIKELY (queue->cursor < GRAY_FIRST_CURSOR_POSITION (queue->first))) { GrayQueueSection *section = queue->first; queue->first = section->next; section->next = queue->free_list; STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_ENQUEUED, GRAY_QUEUE_SECTION_STATE_FREE_LIST); queue->free_list = section; queue->cursor = queue->first ? queue->first->entries + queue->first->size - 1 : NULL; } return entry; }
/* used for the GC-internal data structures */ void* sgen_alloc_pinned (SgenPinnedAllocator *alc, size_t size) { int slot; void *res = NULL; HEAVY_STAT (++stat_pinned_alloc); if (size > freelist_sizes [SGEN_PINNED_FREELIST_NUM_SLOTS - 1]) { LargePinnedMemHeader *mh; size += sizeof (LargePinnedMemHeader); mh = sgen_alloc_os_memory (size, TRUE); mh->magic = LARGE_PINNED_MEM_HEADER_MAGIC; mh->size = size; /* FIXME: do a CAS here */ large_pinned_bytes_alloced += size; return mh->data; } slot = slot_for_size (size); g_assert (size <= freelist_sizes [slot]); res = alloc_from_slot (alc, slot); return res; }
void sgen_gray_object_enqueue (SgenGrayQueue *queue, GCObject *obj, SgenDescriptor desc, gboolean is_parallel) { GrayQueueEntry entry = SGEN_GRAY_QUEUE_ENTRY (obj, desc); HEAVY_STAT (stat_gray_queue_enqueue_slow_path ++); SGEN_ASSERT (9, obj, "enqueueing a null object"); //sgen_check_objref (obj); #ifdef SGEN_CHECK_GRAY_OBJECT_ENQUEUE if (queue->enqueue_check_func) queue->enqueue_check_func (obj); #endif if (G_UNLIKELY (!queue->first || queue->cursor == GRAY_LAST_CURSOR_POSITION (queue->first))) { if (queue->first) { /* * We don't actively update the section size with each push/pop. For the first * section we determine the size from the cursor position. For the reset of the * sections we need to have the size set. */ queue->first->size = SGEN_GRAY_QUEUE_SECTION_SIZE; } sgen_gray_object_alloc_queue_section (queue, is_parallel); } STATE_ASSERT (queue->first, GRAY_QUEUE_SECTION_STATE_ENQUEUED); SGEN_ASSERT (9, queue->cursor <= GRAY_LAST_CURSOR_POSITION (queue->first), "gray queue %p overflow, first %p, cursor %p", queue, queue->first, queue->cursor); *++queue->cursor = entry; #ifdef SGEN_HEAVY_BINARY_PROTOCOL binary_protocol_gray_enqueue (queue, queue->cursor, obj); #endif }
/* * We found a fragment of free memory in the nursery: memzero it and if * it is big enough, add it to the list of fragments that can be used for * allocation. */ static void add_nursery_frag (size_t frag_size, char* frag_start, char* frag_end) { DEBUG (4, fprintf (gc_debug_file, "Found empty fragment: %p-%p, size: %zd\n", frag_start, frag_end, frag_size)); binary_protocol_empty (frag_start, frag_size); /* Not worth dealing with smaller fragments: need to tune */ if (frag_size >= SGEN_MAX_NURSERY_WASTE) { /* memsetting just the first chunk start is bound to provide better cache locality */ if (mono_sgen_get_nursery_clear_policy () == CLEAR_AT_GC) memset (frag_start, 0, frag_size); #ifdef NALLOC_DEBUG /* XXX convert this into a flight record entry printf ("\tfragment [%p %p] size %zd\n", frag_start, frag_end, frag_size); */ #endif add_fragment (frag_start, frag_end); fragment_total += frag_size; } else { /* Clear unused fragments, pinning depends on this */ /*TODO place an int[] here instead of the memset if size justify it*/ memset (frag_start, 0, frag_size); HEAVY_STAT (InterlockedExchangeAdd (&stat_wasted_bytes_small_areas, frag_size)); } }
/* * We found a fragment of free memory in the nursery: memzero it and if * it is big enough, add it to the list of fragments that can be used for * allocation. */ static void add_nursery_frag (SgenFragmentAllocator *allocator, size_t frag_size, char* frag_start, char* frag_end) { SGEN_LOG (4, "Found empty fragment: %p-%p, size: %zd", frag_start, frag_end, frag_size); binary_protocol_empty (frag_start, frag_size); /* Not worth dealing with smaller fragments: need to tune */ if (frag_size >= SGEN_MAX_NURSERY_WASTE) { /* memsetting just the first chunk start is bound to provide better cache locality */ if (sgen_get_nursery_clear_policy () == CLEAR_AT_GC) memset (frag_start, 0, frag_size); else if (sgen_get_nursery_clear_policy () == CLEAR_AT_TLAB_CREATION_DEBUG) memset (frag_start, 0xff, frag_size); #ifdef NALLOC_DEBUG /* XXX convert this into a flight record entry printf ("\tfragment [%p %p] size %zd\n", frag_start, frag_end, frag_size); */ #endif sgen_fragment_allocator_add (allocator, frag_start, frag_end); fragment_total += frag_size; } else { /* Clear unused fragments, pinning depends on this */ sgen_clear_range (frag_start, frag_end); HEAVY_STAT (stat_wasted_bytes_small_areas += frag_size); } }
void* sgen_fragment_allocator_serial_alloc (SgenFragmentAllocator *allocator, size_t size) { SgenFragment *frag; SgenFragment **previous; #ifdef NALLOC_DEBUG InterlockedIncrement (&alloc_count); #endif previous = &allocator->alloc_head; for (frag = *previous; frag; frag = *previous) { char *p = (char *)serial_alloc_from_fragment (previous, frag, size); HEAVY_STAT (++stat_alloc_iterations); if (p) { #ifdef NALLOC_DEBUG add_alloc_record (p, size, FIXED_ALLOC); #endif return p; } previous = &frag->next; } return NULL; }
void sgen_gray_object_free_queue_section (GrayQueueSection *section) { HEAVY_STAT (stat_gray_queue_section_free ++); STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_FLOATING, GRAY_QUEUE_SECTION_STATE_FREED); sgen_free_internal (section, INTERNAL_MEM_GRAY_QUEUE); }
void* sgen_nursery_alloc_range (size_t desired_size, size_t minimum_size, size_t *out_alloc_size) { SGEN_LOG (4, "Searching for byte range desired size: %zd minimum size %zd", desired_size, minimum_size); HEAVY_STAT (++stat_nursery_alloc_range_requests); return sgen_fragment_allocator_par_range_alloc (&mutator_allocator, desired_size, minimum_size, out_alloc_size); }
GrayQueueEntry sgen_gray_object_dequeue (SgenGrayQueue *queue, gboolean is_parallel) { GrayQueueEntry entry; HEAVY_STAT (stat_gray_queue_dequeue_slow_path ++); if (sgen_gray_object_queue_is_empty (queue)) { entry.obj = NULL; return entry; } STATE_ASSERT (queue->first, GRAY_QUEUE_SECTION_STATE_ENQUEUED); SGEN_ASSERT (9, queue->cursor >= GRAY_FIRST_CURSOR_POSITION (queue->first), "gray queue %p underflow", queue); entry = *queue->cursor--; #ifdef SGEN_HEAVY_BINARY_PROTOCOL binary_protocol_gray_dequeue (queue, queue->cursor + 1, entry.obj); #endif if (G_UNLIKELY (queue->cursor < GRAY_FIRST_CURSOR_POSITION (queue->first))) { GrayQueueSection *section; gint32 old_num_sections = 0; if (is_parallel) old_num_sections = mono_atomic_dec_i32 (&queue->num_sections); else queue->num_sections--; if (is_parallel && old_num_sections <= 0) { mono_os_mutex_lock (&queue->steal_mutex); } section = queue->first; queue->first = section->next; if (queue->first) { queue->first->prev = NULL; } else { queue->last = NULL; SGEN_ASSERT (0, !old_num_sections, "Why do we have an inconsistent number of sections ?"); } section->next = queue->free_list; STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_ENQUEUED, GRAY_QUEUE_SECTION_STATE_FREE_LIST); queue->free_list = section; queue->cursor = queue->first ? queue->first->entries + queue->first->size - 1 : NULL; if (is_parallel && old_num_sections <= 0) { mono_os_mutex_unlock (&queue->steal_mutex); } } return entry; }
/* * size is already rounded up and we hold the GC lock. */ static void* major_alloc_degraded (MonoVTable *vtable, size_t size) { void *obj; int old_num_sections; ms_wait_for_sweep_done (); old_num_sections = num_major_sections; obj = alloc_obj (size, FALSE, SGEN_VTABLE_HAS_REFERENCES (vtable)); if (G_LIKELY (obj)) { *(MonoVTable**)obj = vtable; HEAVY_STAT (++stat_objects_alloced_degraded); HEAVY_STAT (stat_bytes_alloced_degraded += size); g_assert (num_major_sections >= old_num_sections); mono_sgen_register_major_sections_alloced (num_major_sections - old_num_sections); } return obj; }
void* sgen_nursery_alloc (size_t size) { SGEN_ASSERT (1, size >= sizeof (MonoObject) && size <= SGEN_MAX_SMALL_OBJ_SIZE, "Invalid nursery object size"); SGEN_LOG (4, "Searching nursery for size: %zd", size); size = SGEN_ALIGN_UP (size); HEAVY_STAT (InterlockedIncrement (&stat_nursery_alloc_requests)); return sgen_fragment_allocator_par_alloc (&mutator_allocator, size); }
void* sgen_nursery_alloc (size_t size) { SGEN_ASSERT (1, size >= (SGEN_CLIENT_MINIMUM_OBJECT_SIZE + CANARY_SIZE) && size <= (SGEN_MAX_SMALL_OBJ_SIZE + CANARY_SIZE), "Invalid nursery object size"); SGEN_LOG (4, "Searching nursery for size: %zd", size); size = SGEN_ALIGN_UP (size); HEAVY_STAT (++stat_nursery_alloc_requests); return sgen_fragment_allocator_par_alloc (&mutator_allocator, size); }
static void sgen_ssb_record_pointer (gpointer ptr) { RememberedSet *rs; gboolean lock = sgen_collection_is_parallel (); gpointer obj = *(gpointer*)ptr; g_assert (!sgen_ptr_in_nursery (ptr) && sgen_ptr_in_nursery (obj)); if (lock) LOCK_GLOBAL_REMSET; if (!global_remset_location_was_not_added (ptr)) goto done; if (G_UNLIKELY (do_pin_stats)) sgen_pin_stats_register_global_remset (obj); SGEN_LOG (8, "Adding global remset for %p", ptr); binary_protocol_global_remset (ptr, *(gpointer*)ptr, (gpointer)SGEN_LOAD_VTABLE (obj)); HEAVY_STAT (++stat_global_remsets_added); /* * FIXME: If an object remains pinned, we need to add it at every minor collection. * To avoid uncontrolled growth of the global remset, only add each pointer once. */ if (global_remset->store_next + 3 < global_remset->end_set) { *(global_remset->store_next++) = (mword)ptr; goto done; } rs = sgen_alloc_remset (global_remset->end_set - global_remset->data, NULL, TRUE); rs->next = global_remset; global_remset = rs; *(global_remset->store_next++) = (mword)ptr; #if SGEN_MAX_DEBUG_LEVEL >= 4 { int global_rs_size = 0; for (rs = global_remset; rs; rs = rs->next) { global_rs_size += rs->store_next - rs->data; } SGEN_LOG (4, "Global remset now has size %d", global_rs_size); } #endif done: if (lock) UNLOCK_GLOBAL_REMSET; }
void* sgen_fragment_allocator_serial_range_alloc (SgenFragmentAllocator *allocator, size_t desired_size, size_t minimum_size, size_t *out_alloc_size) { SgenFragment *frag, **previous, *min_frag = NULL, **prev_min_frag = NULL; size_t current_minimum = minimum_size; #ifdef NALLOC_DEBUG InterlockedIncrement (&alloc_count); #endif previous = &allocator->alloc_head; for (frag = *previous; frag; frag = *previous) { size_t frag_size = frag->fragment_end - frag->fragment_next; HEAVY_STAT (++stat_alloc_range_iterations); if (desired_size <= frag_size) { void *p; *out_alloc_size = desired_size; p = serial_alloc_from_fragment (previous, frag, desired_size); #ifdef NALLOC_DEBUG add_alloc_record (p, desired_size, RANGE_ALLOC); #endif return p; } if (current_minimum <= frag_size) { min_frag = frag; prev_min_frag = previous; current_minimum = frag_size; } previous = &frag->next; } if (min_frag) { void *p; size_t frag_size = min_frag->fragment_end - min_frag->fragment_next; *out_alloc_size = frag_size; p = serial_alloc_from_fragment (prev_min_frag, min_frag, frag_size); #ifdef NALLOC_DEBUG add_alloc_record (p, frag_size, RANGE_ALLOC); #endif return p; } return NULL; }
void sgen_gray_object_alloc_queue_section (SgenGrayQueue *queue, gboolean is_parallel) { GrayQueueSection *section; if (queue->free_list) { /* Use the previously allocated queue sections if possible */ section = queue->free_list; queue->free_list = section->next; STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_FREE_LIST, GRAY_QUEUE_SECTION_STATE_FLOATING); } else { HEAVY_STAT (stat_gray_queue_section_alloc ++); /* Allocate a new section */ section = (GrayQueueSection *)sgen_alloc_internal (INTERNAL_MEM_GRAY_QUEUE); STATE_SET (section, GRAY_QUEUE_SECTION_STATE_FLOATING); } /* Section is empty */ section->size = 0; STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_FLOATING, GRAY_QUEUE_SECTION_STATE_ENQUEUED); /* Link it with the others */ section->next = queue->first; section->prev = NULL; if (queue->first) queue->first->prev = section; else queue->last = section; queue->first = section; queue->cursor = section->entries - 1; if (is_parallel) { mono_memory_write_barrier (); /* * FIXME * we could probably optimize the code to only rely on the write barrier * for synchronization with the stealer thread. Additionally we could also * do a write barrier once every other gray queue change, and request * to have a minimum of sections before stealing, to keep consistency. */ mono_atomic_inc_i32 (&queue->num_sections); } else { queue->num_sections++; } }
static void mono_sgen_ssb_begin_scan_remsets (void *start_nursery, void *end_nursery, SgenGrayQueue *queue) { RememberedSet *remset; mword *p, *next_p, *store_pos; /* the global one */ for (remset = global_remset; remset; remset = remset->next) { DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %td\n", remset->data, remset->store_next, remset->store_next - remset->data)); store_pos = remset->data; for (p = remset->data; p < remset->store_next; p = next_p) { void **ptr = (void**)p [0]; /*Ignore previously processed remset.*/ if (!global_remset_location_was_not_added (ptr)) { next_p = p + 1; continue; } next_p = handle_remset (p, start_nursery, end_nursery, TRUE, queue); /* * Clear global remsets of locations which no longer point to the * nursery. Otherwise, they could grow indefinitely between major * collections. * * Since all global remsets are location remsets, we don't need to unmask the pointer. */ if (mono_sgen_ptr_in_nursery (*ptr)) { *store_pos ++ = p [0]; HEAVY_STAT (++stat_global_remsets_readded); } } /* Truncate the remset */ remset->store_next = store_pos; } }
static void* par_alloc_from_fragment (SgenFragmentAllocator *allocator, SgenFragment *frag, size_t size) { char *p = frag->fragment_next; char *end = p + size; if (end > frag->fragment_end) return NULL; /* p = frag->fragment_next must happen before */ mono_memory_barrier (); if (InterlockedCompareExchangePointer ((volatile gpointer*)&frag->fragment_next, end, p) != p) return NULL; if (frag->fragment_end - end < SGEN_MAX_NURSERY_WASTE) { SgenFragment *next, **prev_ptr; /* * Before we clean the remaining nursery, we must claim the remaining space * as it could end up been used by the range allocator since it can end up * allocating from this dying fragment as it doesn't respect SGEN_MAX_NURSERY_WASTE * when doing second chance allocation. */ if ((sgen_get_nursery_clear_policy () == CLEAR_AT_TLAB_CREATION || sgen_get_nursery_clear_policy () == CLEAR_AT_TLAB_CREATION_DEBUG) && claim_remaining_size (frag, end)) { sgen_clear_range (end, frag->fragment_end); HEAVY_STAT (stat_wasted_bytes_trailer += frag->fragment_end - end); #ifdef NALLOC_DEBUG add_alloc_record (end, frag->fragment_end - end, BLOCK_ZEROING); #endif } prev_ptr = find_previous_pointer_fragment (allocator, frag); /*Use Michaels linked list remove*/ /*prev_ptr will be null if the fragment was removed concurrently */ while (prev_ptr) { next = frag->next; /*already deleted*/ if (!get_mark (next)) { /*frag->next read must happen before the first CAS*/ mono_memory_write_barrier (); /*Fail if the next node is removed concurrently and its CAS wins */ if (InterlockedCompareExchangePointer ((volatile gpointer*)&frag->next, mask (next, 1), next) != next) { continue; } } /* The second CAS must happen after the first CAS or frag->next. */ mono_memory_write_barrier (); /* Fail if the previous node was deleted and its CAS wins */ if (InterlockedCompareExchangePointer ((volatile gpointer*)prev_ptr, unmask (next), frag) != frag) { prev_ptr = find_previous_pointer_fragment (allocator, frag); continue; } break; } } return p; }
/* * Provide a variant that takes just the vtable for small fixed-size objects. * The aligned size is already computed and stored in vt->gc_descr. * Note: every SGEN_SCAN_START_SIZE or so we are given the chance to do some special * processing. We can keep track of where objects start, for example, * so when we scan the thread stacks for pinned objects, we can start * a search for the pinned object in SGEN_SCAN_START_SIZE chunks. */ static void* mono_gc_alloc_obj_nolock (MonoVTable *vtable, size_t size) { /* FIXME: handle OOM */ void **p; char *new_next; TLAB_ACCESS_INIT; HEAVY_STAT (++stat_objects_alloced); if (size <= SGEN_MAX_SMALL_OBJ_SIZE) HEAVY_STAT (stat_bytes_alloced += size); else HEAVY_STAT (stat_bytes_alloced_los += size); size = ALIGN_UP (size); g_assert (vtable->gc_descr); if (G_UNLIKELY (has_per_allocation_action)) { static int alloc_count; int current_alloc = InterlockedIncrement (&alloc_count); if (collect_before_allocs) { if (((current_alloc % collect_before_allocs) == 0) && nursery_section) { sgen_perform_collection (0, GENERATION_NURSERY, "collect-before-alloc-triggered"); if (!degraded_mode && sgen_can_alloc_size (size) && size <= SGEN_MAX_SMALL_OBJ_SIZE) { // FIXME: g_assert_not_reached (); } } } else if (verify_before_allocs) { if ((current_alloc % verify_before_allocs) == 0) sgen_check_whole_heap_stw (); } } /* * We must already have the lock here instead of after the * fast path because we might be interrupted in the fast path * (after confirming that new_next < TLAB_TEMP_END) by the GC, * and we'll end up allocating an object in a fragment which * no longer belongs to us. * * The managed allocator does not do this, but it's treated * specially by the world-stopping code. */ if (size > SGEN_MAX_SMALL_OBJ_SIZE) { p = sgen_los_alloc_large_inner (vtable, size); } else { /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */ p = (void**)TLAB_NEXT; /* FIXME: handle overflow */ new_next = (char*)p + size; TLAB_NEXT = new_next; if (G_LIKELY (new_next < TLAB_TEMP_END)) { /* Fast path */ /* * FIXME: We might need a memory barrier here so the change to tlab_next is * visible before the vtable store. */ DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size)); binary_protocol_alloc (p , vtable, size); if (G_UNLIKELY (MONO_GC_NURSERY_OBJ_ALLOC_ENABLED ())) MONO_GC_NURSERY_OBJ_ALLOC ((mword)p, size, vtable->klass->name_space, vtable->klass->name); g_assert (*p == NULL); mono_atomic_store_seq (p, vtable); return p; } /* Slow path */ /* there are two cases: the object is too big or we run out of space in the TLAB */ /* we also reach here when the thread does its first allocation after a minor * collection, since the tlab_ variables are initialized to NULL. * there can be another case (from ORP), if we cooperate with the runtime a bit: * objects that need finalizers can have the high bit set in their size * so the above check fails and we can readily add the object to the queue. * This avoids taking again the GC lock when registering, but this is moot when * doing thread-local allocation, so it may not be a good idea. */ if (TLAB_NEXT >= TLAB_REAL_END) { int available_in_tlab; /* * Run out of space in the TLAB. When this happens, some amount of space * remains in the TLAB, but not enough to satisfy the current allocation * request. Currently, we retire the TLAB in all cases, later we could * keep it if the remaining space is above a treshold, and satisfy the * allocation directly from the nursery. */ TLAB_NEXT -= size; /* when running in degraded mode, we continue allocing that way * for a while, to decrease the number of useless nursery collections. */ if (degraded_mode && degraded_mode < DEFAULT_NURSERY_SIZE) return alloc_degraded (vtable, size, FALSE); available_in_tlab = TLAB_REAL_END - TLAB_NEXT; if (size > tlab_size || available_in_tlab > SGEN_MAX_NURSERY_WASTE) { /* Allocate directly from the nursery */ do { p = sgen_nursery_alloc (size); if (!p) { sgen_ensure_free_space (size); if (degraded_mode) return alloc_degraded (vtable, size, FALSE); else p = sgen_nursery_alloc (size); } } while (!p); if (!p) { // no space left g_assert (0); } if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) { memset (p, 0, size); } } else { size_t alloc_size = 0; if (TLAB_START) DEBUG (3, fprintf (gc_debug_file, "Retire TLAB: %p-%p [%ld]\n", TLAB_START, TLAB_REAL_END, (long)(TLAB_REAL_END - TLAB_NEXT - size))); sgen_nursery_retire_region (p, available_in_tlab); do { p = sgen_nursery_alloc_range (tlab_size, size, &alloc_size); if (!p) { sgen_ensure_free_space (tlab_size); if (degraded_mode) return alloc_degraded (vtable, size, FALSE); else p = sgen_nursery_alloc_range (tlab_size, size, &alloc_size); } } while (!p); if (!p) { // no space left g_assert (0); } /* Allocate a new TLAB from the current nursery fragment */ TLAB_START = (char*)p; TLAB_NEXT = TLAB_START; TLAB_REAL_END = TLAB_START + alloc_size; TLAB_TEMP_END = TLAB_START + MIN (SGEN_SCAN_START_SIZE, alloc_size); if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) { memset (TLAB_START, 0, alloc_size); } /* Allocate from the TLAB */ p = (void*)TLAB_NEXT; TLAB_NEXT += size; sgen_set_nursery_scan_start ((char*)p); } } else { /* Reached tlab_temp_end */ /* record the scan start so we can find pinned objects more easily */ sgen_set_nursery_scan_start ((char*)p); /* we just bump tlab_temp_end as well */ TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE); DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", TLAB_NEXT, TLAB_TEMP_END)); } } if (G_LIKELY (p)) { DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size)); binary_protocol_alloc (p, vtable, size); if (G_UNLIKELY (MONO_GC_MAJOR_OBJ_ALLOC_LARGE_ENABLED ()|| MONO_GC_NURSERY_OBJ_ALLOC_ENABLED ())) { if (size > SGEN_MAX_SMALL_OBJ_SIZE) MONO_GC_MAJOR_OBJ_ALLOC_LARGE ((mword)p, size, vtable->klass->name_space, vtable->klass->name); else MONO_GC_NURSERY_OBJ_ALLOC ((mword)p, size, vtable->klass->name_space, vtable->klass->name); } mono_atomic_store_seq (p, vtable); } return p; }
static void* mono_gc_try_alloc_obj_nolock (MonoVTable *vtable, size_t size) { void **p; char *new_next; TLAB_ACCESS_INIT; size = ALIGN_UP (size); g_assert (vtable->gc_descr); if (size > SGEN_MAX_SMALL_OBJ_SIZE) return NULL; if (G_UNLIKELY (size > tlab_size)) { /* Allocate directly from the nursery */ p = sgen_nursery_alloc (size); if (!p) return NULL; sgen_set_nursery_scan_start ((char*)p); /*FIXME we should use weak memory ops here. Should help specially on x86. */ if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) memset (p, 0, size); } else { int available_in_tlab; char *real_end; /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */ p = (void**)TLAB_NEXT; /* FIXME: handle overflow */ new_next = (char*)p + size; real_end = TLAB_REAL_END; available_in_tlab = real_end - (char*)p; if (G_LIKELY (new_next < real_end)) { TLAB_NEXT = new_next; /* Second case, we overflowed temp end */ if (G_UNLIKELY (new_next >= TLAB_TEMP_END)) { sgen_set_nursery_scan_start (new_next); /* we just bump tlab_temp_end as well */ TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE); DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", TLAB_NEXT, TLAB_TEMP_END)); } } else if (available_in_tlab > SGEN_MAX_NURSERY_WASTE) { /* Allocate directly from the nursery */ p = sgen_nursery_alloc (size); if (!p) return NULL; if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) memset (p, 0, size); } else { size_t alloc_size = 0; sgen_nursery_retire_region (p, available_in_tlab); new_next = sgen_nursery_alloc_range (tlab_size, size, &alloc_size); p = (void**)new_next; if (!p) return NULL; TLAB_START = (char*)new_next; TLAB_NEXT = new_next + size; TLAB_REAL_END = new_next + alloc_size; TLAB_TEMP_END = new_next + MIN (SGEN_SCAN_START_SIZE, alloc_size); sgen_set_nursery_scan_start ((char*)p); if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) memset (new_next, 0, alloc_size); MONO_GC_NURSERY_TLAB_ALLOC ((mword)new_next, alloc_size); } } HEAVY_STAT (++stat_objects_alloced); HEAVY_STAT (stat_bytes_alloced += size); DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size)); binary_protocol_alloc (p, vtable, size); if (G_UNLIKELY (MONO_GC_NURSERY_OBJ_ALLOC_ENABLED ())) MONO_GC_NURSERY_OBJ_ALLOC ((mword)p, size, vtable->klass->name_space, vtable->klass->name); g_assert (*p == NULL); /* FIXME disable this in non debug builds */ mono_atomic_store_seq (p, vtable); return p; }
/*** Nursery memory allocation ***/ void sgen_nursery_retire_region (void *address, ptrdiff_t size) { HEAVY_STAT (stat_wasted_bytes_discarded_fragments += size); }
void* sgen_fragment_allocator_par_range_alloc (SgenFragmentAllocator *allocator, size_t desired_size, size_t minimum_size, size_t *out_alloc_size) { SgenFragment *frag, *min_frag; size_t current_minimum; restart: min_frag = NULL; current_minimum = minimum_size; #ifdef NALLOC_DEBUG InterlockedIncrement (&alloc_count); #endif for (frag = (SgenFragment *)unmask (allocator->alloc_head); frag; frag = (SgenFragment *)unmask (frag->next)) { size_t frag_size = frag->fragment_end - frag->fragment_next; HEAVY_STAT (++stat_alloc_range_iterations); if (desired_size <= frag_size) { void *p; *out_alloc_size = desired_size; p = par_alloc_from_fragment (allocator, frag, desired_size); if (!p) { HEAVY_STAT (++stat_alloc_range_retries); goto restart; } #ifdef NALLOC_DEBUG add_alloc_record (p, desired_size, RANGE_ALLOC); #endif return p; } if (current_minimum <= frag_size) { min_frag = frag; current_minimum = frag_size; } } /* The second fragment_next read should be ordered in respect to the first code block */ mono_memory_barrier (); if (min_frag) { void *p; size_t frag_size; frag_size = min_frag->fragment_end - min_frag->fragment_next; if (frag_size < minimum_size) goto restart; *out_alloc_size = frag_size; mono_memory_barrier (); p = par_alloc_from_fragment (allocator, min_frag, frag_size); /*XXX restarting here is quite dubious given this is already second chance allocation. */ if (!p) { HEAVY_STAT (++stat_alloc_retries); goto restart; } #ifdef NALLOC_DEBUG add_alloc_record (p, frag_size, RANGE_ALLOC); #endif return p; } return NULL; }