void sgen_gray_object_enqueue_section (SgenGrayQueue *queue, GrayQueueSection *section, gboolean is_parallel) { STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_FLOATING, GRAY_QUEUE_SECTION_STATE_ENQUEUED); if (queue->first) queue->first->size = queue->cursor - queue->first->entries + 1; section->next = queue->first; section->prev = NULL; if (queue->first) queue->first->prev = section; else queue->last = section; queue->first = section; queue->cursor = queue->first->entries + queue->first->size - 1; #ifdef SGEN_CHECK_GRAY_OBJECT_ENQUEUE if (queue->enqueue_check_func) { int i; for (i = 0; i < section->size; ++i) queue->enqueue_check_func (section->entries [i].obj); } #endif if (is_parallel) { mono_memory_write_barrier (); mono_atomic_inc_i32 (&queue->num_sections); } else { queue->num_sections++; } }
MONO_SIG_HANDLER_FUNC (static, profiler_signal_handler) { int old_errno = errno; MONO_SIG_HANDLER_GET_CONTEXT; /* See the comment in mono_runtime_shutdown_stat_profiler (). */ if (mono_native_thread_id_get () == sampling_thread) { mono_atomic_inc_i32 (&profiler_interrupt_signals_received); return; } mono_atomic_inc_i32 (&profiler_signals_received); // Did a non-attached or detaching thread get the signal? if (mono_thread_info_get_small_id () == -1 || !mono_domain_get () || !mono_tls_get_jit_tls ()) { errno = old_errno; return; } // See the comment in sampling_thread_func (). mono_atomic_store_i32 (&mono_thread_info_current ()->profiler_signal_ack, 1); mono_atomic_inc_i32 (&profiler_signals_accepted); int hp_save_index = mono_hazard_pointer_save_for_signal_handler (); mono_thread_info_set_is_async_context (TRUE); MONO_PROFILER_RAISE (sample_hit, (mono_arch_ip_from_context (ctx), ctx)); mono_thread_info_set_is_async_context (FALSE); mono_hazard_pointer_restore_for_signal_handler (hp_save_index); errno = old_errno; mono_chain_signal (MONO_SIG_HANDLER_PARAMS); }
/** * mono_thread_hazardous_queue_free: * \param p the pointer to free * \param free_func the function that can free the pointer * Queue \p p to be freed later. \p p will be freed once the hazard free queue is pumped. * * This function doesn't pump the free queue so try to accommodate a call at an appropriate time. * See \c mono_thread_hazardous_try_free_some for when it's appropriate. */ void mono_thread_hazardous_queue_free (gpointer p, MonoHazardousFreeFunc free_func) { DelayedFreeItem item = { p, free_func }; mono_atomic_inc_i32 (&hazardous_pointer_count); mono_lock_free_array_queue_push (&delayed_free_queue, &item); guint32 queue_size = delayed_free_queue.num_used_entries; if (queue_size && queue_size_cb) queue_size_cb (queue_size); }
GrayQueueSection* sgen_gray_object_steal_section (SgenGrayQueue *queue) { gint32 sections_remaining; GrayQueueSection *section = NULL; /* * With each push/pop into the queue we increment the number of sections. * There is only one thread accessing the top (the owner) and potentially * multiple workers trying to steal sections from the bottom, so we need * to lock. A num sections decrement from the owner means that the first * section is reserved, while a decrement by the stealer means that the * last section is reserved. If after we decrement the num sections, we * have at least one more section present, it means we can't race with * the other thread. If this is not the case the steal end abandons the * pop, setting back the num_sections, while the owner end will take a * lock to make sure we are not racing with the stealer (since the stealer * might have popped an entry and be in the process of updating the entry * that the owner is trying to pop. */ if (queue->num_sections <= 1) return NULL; /* Give up if there is contention on the last section */ if (mono_os_mutex_trylock (&queue->steal_mutex) != 0) return NULL; sections_remaining = mono_atomic_dec_i32 (&queue->num_sections); if (sections_remaining <= 0) { /* The section that we tried to steal might be the head of the queue. */ mono_atomic_inc_i32 (&queue->num_sections); } else { /* We have reserved for us the tail section of the queue */ section = queue->last; SGEN_ASSERT (0, section, "Why we don't have any sections to steal?"); SGEN_ASSERT (0, !section->next, "Why aren't we stealing the tail?"); queue->last = section->prev; section->prev = NULL; SGEN_ASSERT (0, queue->last, "Why are we stealing the last section?"); queue->last->next = NULL; STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_ENQUEUED, GRAY_QUEUE_SECTION_STATE_FLOATING); } mono_os_mutex_unlock (&queue->steal_mutex); return section; }
void sgen_gray_object_alloc_queue_section (SgenGrayQueue *queue, gboolean is_parallel) { GrayQueueSection *section; if (queue->free_list) { /* Use the previously allocated queue sections if possible */ section = queue->free_list; queue->free_list = section->next; STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_FREE_LIST, GRAY_QUEUE_SECTION_STATE_FLOATING); } else { HEAVY_STAT (stat_gray_queue_section_alloc ++); /* Allocate a new section */ section = (GrayQueueSection *)sgen_alloc_internal (INTERNAL_MEM_GRAY_QUEUE); STATE_SET (section, GRAY_QUEUE_SECTION_STATE_FLOATING); } /* Section is empty */ section->size = 0; STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_FLOATING, GRAY_QUEUE_SECTION_STATE_ENQUEUED); /* Link it with the others */ section->next = queue->first; section->prev = NULL; if (queue->first) queue->first->prev = section; else queue->last = section; queue->first = section; queue->cursor = section->entries - 1; if (is_parallel) { mono_memory_write_barrier (); /* * FIXME * we could probably optimize the code to only rely on the write barrier * for synchronization with the stealer thread. Additionally we could also * do a write barrier once every other gray queue change, and request * to have a minimum of sections before stealing, to keep consistency. */ mono_atomic_inc_i32 (&queue->num_sections); } else { queue->num_sections++; } }
GCObject* sgen_alloc_obj (GCVTable vtable, size_t size) { GCObject *res; TLAB_ACCESS_INIT; if (!SGEN_CAN_ALIGN_UP (size)) return NULL; if (G_UNLIKELY (sgen_has_per_allocation_action)) { static int alloc_count; int current_alloc = mono_atomic_inc_i32 (&alloc_count); if (sgen_verify_before_allocs) { if ((current_alloc % sgen_verify_before_allocs) == 0) { LOCK_GC; sgen_check_whole_heap_stw (); UNLOCK_GC; } } if (sgen_collect_before_allocs) { if (((current_alloc % sgen_collect_before_allocs) == 0) && sgen_nursery_section) { LOCK_GC; sgen_perform_collection (0, GENERATION_NURSERY, "collect-before-alloc-triggered", TRUE, TRUE); UNLOCK_GC; } } } ENTER_CRITICAL_REGION; res = sgen_try_alloc_obj_nolock (vtable, size); if (res) { EXIT_CRITICAL_REGION; return res; } EXIT_CRITICAL_REGION; LOCK_GC; res = sgen_alloc_obj_nolock (vtable, size); UNLOCK_GC; return res; }
gboolean sgen_cement_lookup_or_register (GCObject *obj) { guint hv; int i; CementHashEntry *hash = cement_hash; if (!cement_enabled) return FALSE; hv = sgen_aligned_addr_hash (obj); i = SGEN_CEMENT_HASH (hv); SGEN_ASSERT (5, sgen_ptr_in_nursery (obj), "Can only cement pointers to nursery objects"); if (!hash [i].obj) { GCObject *old_obj; old_obj = (GCObject*)mono_atomic_cas_ptr ((gpointer*)&hash [i].obj, obj, NULL); /* Check if the slot was occupied by some other object */ if (old_obj != NULL && old_obj != obj) return FALSE; } else if (hash [i].obj != obj) { return FALSE; } if (hash [i].count >= SGEN_CEMENT_THRESHOLD) return TRUE; if (mono_atomic_inc_i32 ((gint32*)&hash [i].count) == SGEN_CEMENT_THRESHOLD) { SGEN_ASSERT (9, sgen_get_current_collection_generation () >= 0, "We can only cement objects when we're in a collection pause."); SGEN_ASSERT (9, SGEN_OBJECT_IS_PINNED (obj), "Can only cement pinned objects"); SGEN_CEMENT_OBJECT (obj); sgen_binary_protocol_cement (obj, (gpointer)SGEN_LOAD_VTABLE (obj), (int)sgen_safe_object_get_size (obj)); } return FALSE; }
/* * Provide a variant that takes just the vtable for small fixed-size objects. * The aligned size is already computed and stored in vt->gc_descr. * Note: every SGEN_SCAN_START_SIZE or so we are given the chance to do some special * processing. We can keep track of where objects start, for example, * so when we scan the thread stacks for pinned objects, we can start * a search for the pinned object in SGEN_SCAN_START_SIZE chunks. */ GCObject* sgen_alloc_obj_nolock (GCVTable vtable, size_t size) { /* FIXME: handle OOM */ void **p; char *new_next; size_t real_size = size; TLAB_ACCESS_INIT; CANARIFY_SIZE(size); HEAVY_STAT (++stat_objects_alloced); if (real_size <= SGEN_MAX_SMALL_OBJ_SIZE) HEAVY_STAT (stat_bytes_alloced += size); else HEAVY_STAT (stat_bytes_alloced_los += size); size = ALIGN_UP (size); SGEN_ASSERT (6, sgen_vtable_get_descriptor (vtable), "VTable without descriptor"); if (G_UNLIKELY (sgen_has_per_allocation_action)) { static int alloc_count; int current_alloc = mono_atomic_inc_i32 (&alloc_count); if (sgen_collect_before_allocs) { if (((current_alloc % sgen_collect_before_allocs) == 0) && sgen_nursery_section) { sgen_perform_collection (0, GENERATION_NURSERY, "collect-before-alloc-triggered", TRUE, TRUE); if (!sgen_degraded_mode && sgen_can_alloc_size (size) && real_size <= SGEN_MAX_SMALL_OBJ_SIZE) { // FIXME: g_assert_not_reached (); } } } else if (sgen_verify_before_allocs) { if ((current_alloc % sgen_verify_before_allocs) == 0) sgen_check_whole_heap_stw (); } } /* * We must already have the lock here instead of after the * fast path because we might be interrupted in the fast path * (after confirming that new_next < TLAB_TEMP_END) by the GC, * and we'll end up allocating an object in a fragment which * no longer belongs to us. * * The managed allocator does not do this, but it's treated * specially by the world-stopping code. */ if (real_size > SGEN_MAX_SMALL_OBJ_SIZE) { p = (void **)sgen_los_alloc_large_inner (vtable, ALIGN_UP (real_size)); } else { /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */ p = (void**)TLAB_NEXT; /* FIXME: handle overflow */ new_next = (char*)p + size; TLAB_NEXT = new_next; if (G_LIKELY (new_next < TLAB_TEMP_END)) { /* Fast path */ CANARIFY_ALLOC(p,real_size); SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size); sgen_binary_protocol_alloc (p , vtable, size, sgen_client_get_provenance ()); g_assert (*p == NULL); mono_atomic_store_seq (p, vtable); return (GCObject*)p; } /* Slow path */ /* there are two cases: the object is too big or we run out of space in the TLAB */ /* we also reach here when the thread does its first allocation after a minor * collection, since the tlab_ variables are initialized to NULL. * there can be another case (from ORP), if we cooperate with the runtime a bit: * objects that need finalizers can have the high bit set in their size * so the above check fails and we can readily add the object to the queue. * This avoids taking again the GC lock when registering, but this is moot when * doing thread-local allocation, so it may not be a good idea. */ if (TLAB_NEXT >= TLAB_REAL_END) { int available_in_tlab; /* * Run out of space in the TLAB. When this happens, some amount of space * remains in the TLAB, but not enough to satisfy the current allocation * request. Currently, we retire the TLAB in all cases, later we could * keep it if the remaining space is above a treshold, and satisfy the * allocation directly from the nursery. */ TLAB_NEXT -= size; /* when running in degraded mode, we continue allocing that way * for a while, to decrease the number of useless nursery collections. */ if (sgen_degraded_mode && sgen_degraded_mode < sgen_nursery_size) return alloc_degraded (vtable, size, FALSE); available_in_tlab = (int)(TLAB_REAL_END - TLAB_NEXT);//We'll never have tlabs > 2Gb if (size > sgen_tlab_size || available_in_tlab > SGEN_MAX_NURSERY_WASTE) { /* Allocate directly from the nursery */ p = (void **)sgen_nursery_alloc (size); if (!p) { /* * We couldn't allocate from the nursery, so we try * collecting. Even after the collection, we might * still not have enough memory to allocate the * object. The reason will most likely be that we've * run out of memory, but there is the theoretical * possibility that other threads might have consumed * the freed up memory ahead of us. * * What we do in this case is allocate degraded, i.e., * from the major heap. * * Ideally we'd like to detect the case of other * threads allocating ahead of us and loop (if we * always loop we will loop endlessly in the case of * OOM). */ sgen_ensure_free_space (real_size, GENERATION_NURSERY); if (!sgen_degraded_mode) p = (void **)sgen_nursery_alloc (size); } if (!p) return alloc_degraded (vtable, size, TRUE); zero_tlab_if_necessary (p, size); } else { size_t alloc_size = 0; if (TLAB_START) SGEN_LOG (3, "Retire TLAB: %p-%p [%ld]", TLAB_START, TLAB_REAL_END, (long)(TLAB_REAL_END - TLAB_NEXT - size)); sgen_nursery_retire_region (p, available_in_tlab); p = (void **)sgen_nursery_alloc_range (sgen_tlab_size, size, &alloc_size); if (!p) { /* See comment above in similar case. */ sgen_ensure_free_space (sgen_tlab_size, GENERATION_NURSERY); if (!sgen_degraded_mode) p = (void **)sgen_nursery_alloc_range (sgen_tlab_size, size, &alloc_size); } if (!p) return alloc_degraded (vtable, size, TRUE); /* Allocate a new TLAB from the current nursery fragment */ TLAB_START = (char*)p; TLAB_NEXT = TLAB_START; TLAB_REAL_END = TLAB_START + alloc_size; TLAB_TEMP_END = TLAB_START + MIN (SGEN_SCAN_START_SIZE, alloc_size); zero_tlab_if_necessary (TLAB_START, alloc_size); /* Allocate from the TLAB */ p = (void **)TLAB_NEXT; TLAB_NEXT += size; sgen_set_nursery_scan_start ((char*)p); } } else { /* Reached tlab_temp_end */ /* record the scan start so we can find pinned objects more easily */ sgen_set_nursery_scan_start ((char*)p); /* we just bump tlab_temp_end as well */ TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE); SGEN_LOG (5, "Expanding local alloc: %p-%p", TLAB_NEXT, TLAB_TEMP_END); } CANARIFY_ALLOC(p,real_size); } if (G_LIKELY (p)) { SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size); sgen_binary_protocol_alloc (p, vtable, size, sgen_client_get_provenance ()); mono_atomic_store_seq (p, vtable); } return (GCObject*)p; }
static gsize sampling_thread_func (gpointer unused) { MonoInternalThread *thread = mono_thread_internal_current (); thread->flags |= MONO_THREAD_FLAG_DONT_MANAGE; ERROR_DECL (error); MonoString *name = mono_string_new_checked (mono_get_root_domain (), "Profiler Sampler", error); mono_error_assert_ok (error); mono_thread_set_name_internal (thread, name, FALSE, FALSE, error); mono_error_assert_ok (error); mono_thread_info_set_flags (MONO_THREAD_INFO_FLAGS_NO_GC | MONO_THREAD_INFO_FLAGS_NO_SAMPLE); int old_policy; struct sched_param old_sched; pthread_getschedparam (pthread_self (), &old_policy, &old_sched); /* * Attempt to switch the thread to real time scheduling. This will not * necessarily work on all OSs; for example, most Linux systems will give * us EPERM here unless configured to allow this. * * TODO: This does not work on Mac (and maybe some other OSs). On Mac, we * have to use the Mach thread policy routines to switch to real-time * scheduling. This is quite tricky as we need to specify how often we'll * be doing work (easy), the normal processing time needed (also easy), * and the maximum amount of processing time needed (hard). This is * further complicated by the fact that if we misbehave and take too long * to do our work, the kernel may knock us back down to the normal thread * scheduling policy without telling us. */ struct sched_param sched = { .sched_priority = sched_get_priority_max (SCHED_FIFO) }; pthread_setschedparam (pthread_self (), SCHED_FIFO, &sched); MonoProfilerSampleMode mode; init: mono_profiler_get_sample_mode (NULL, &mode, NULL); if (mode == MONO_PROFILER_SAMPLE_MODE_NONE) { mono_profiler_sampling_thread_wait (); if (!mono_atomic_load_i32 (&sampling_thread_running)) goto done; goto init; } clock_init (mode); for (guint64 sleep = clock_get_time_ns (); mono_atomic_load_i32 (&sampling_thread_running); clock_sleep_ns_abs (sleep)) { uint32_t freq; MonoProfilerSampleMode new_mode; mono_profiler_get_sample_mode (NULL, &new_mode, &freq); if (new_mode != mode) { clock_cleanup (); goto init; } sleep += 1000000000 / freq; FOREACH_THREAD_SAFE_EXCLUDE (info, MONO_THREAD_INFO_FLAGS_NO_SAMPLE) { g_assert (mono_thread_info_get_tid (info) != sampling_thread); /* * Require an ack for the last sampling signal sent to the thread * so that we don't overflow the signal queue, leading to all sorts * of problems (e.g. GC STW failing). */ if (profiler_signal != SIGPROF && !mono_atomic_cas_i32 (&info->profiler_signal_ack, 0, 1)) continue; mono_threads_pthread_kill (info, profiler_signal); mono_atomic_inc_i32 (&profiler_signals_sent); } FOREACH_THREAD_SAFE_END }