예제 #1
0
파일: sgen-gray.c 프로젝트: medo64/mono
void
sgen_gray_object_enqueue_section (SgenGrayQueue *queue, GrayQueueSection *section, gboolean is_parallel)
{
	STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_FLOATING, GRAY_QUEUE_SECTION_STATE_ENQUEUED);

	if (queue->first)
		queue->first->size = queue->cursor - queue->first->entries + 1;

	section->next = queue->first;
	section->prev = NULL;
	if (queue->first)
		queue->first->prev = section;
	else
		queue->last = section;
	queue->first = section;
	queue->cursor = queue->first->entries + queue->first->size - 1;
#ifdef SGEN_CHECK_GRAY_OBJECT_ENQUEUE
	if (queue->enqueue_check_func) {
		int i;
		for (i = 0; i < section->size; ++i)
			queue->enqueue_check_func (section->entries [i].obj);
	}
#endif
	if (is_parallel) {
		mono_memory_write_barrier ();
		mono_atomic_inc_i32 (&queue->num_sections);
	} else {
		queue->num_sections++;
	}
}
예제 #2
0
MONO_SIG_HANDLER_FUNC (static, profiler_signal_handler)
{
	int old_errno = errno;

	MONO_SIG_HANDLER_GET_CONTEXT;

	/* See the comment in mono_runtime_shutdown_stat_profiler (). */
	if (mono_native_thread_id_get () == sampling_thread) {
		mono_atomic_inc_i32 (&profiler_interrupt_signals_received);
		return;
	}

	mono_atomic_inc_i32 (&profiler_signals_received);

	// Did a non-attached or detaching thread get the signal?
	if (mono_thread_info_get_small_id () == -1 ||
	    !mono_domain_get () ||
	    !mono_tls_get_jit_tls ()) {
		errno = old_errno;
		return;
	}

	// See the comment in sampling_thread_func ().
	mono_atomic_store_i32 (&mono_thread_info_current ()->profiler_signal_ack, 1);

	mono_atomic_inc_i32 (&profiler_signals_accepted);

	int hp_save_index = mono_hazard_pointer_save_for_signal_handler ();

	mono_thread_info_set_is_async_context (TRUE);

	MONO_PROFILER_RAISE (sample_hit, (mono_arch_ip_from_context (ctx), ctx));

	mono_thread_info_set_is_async_context (FALSE);

	mono_hazard_pointer_restore_for_signal_handler (hp_save_index);

	errno = old_errno;

	mono_chain_signal (MONO_SIG_HANDLER_PARAMS);
}
예제 #3
0
/**
 * mono_thread_hazardous_queue_free:
 * \param p the pointer to free
 * \param free_func the function that can free the pointer
 * Queue \p p to be freed later. \p p will be freed once the hazard free queue is pumped.
 *
 * This function doesn't pump the free queue so try to accommodate a call at an appropriate time.
 * See \c mono_thread_hazardous_try_free_some for when it's appropriate.
 */
void
mono_thread_hazardous_queue_free (gpointer p, MonoHazardousFreeFunc free_func)
{
	DelayedFreeItem item = { p, free_func };

	mono_atomic_inc_i32 (&hazardous_pointer_count);

	mono_lock_free_array_queue_push (&delayed_free_queue, &item);

	guint32 queue_size = delayed_free_queue.num_used_entries;
	if (queue_size && queue_size_cb)
		queue_size_cb (queue_size);
}
예제 #4
0
파일: sgen-gray.c 프로젝트: medo64/mono
GrayQueueSection*
sgen_gray_object_steal_section (SgenGrayQueue *queue)
{
	gint32 sections_remaining;
	GrayQueueSection *section = NULL;

	/*
	 * With each push/pop into the queue we increment the number of sections.
	 * There is only one thread accessing the top (the owner) and potentially
	 * multiple workers trying to steal sections from the bottom, so we need
	 * to lock. A num sections decrement from the owner means that the first
	 * section is reserved, while a decrement by the stealer means that the
	 * last section is reserved. If after we decrement the num sections, we
	 * have at least one more section present, it means we can't race with
	 * the other thread. If this is not the case the steal end abandons the
	 * pop, setting back the num_sections, while the owner end will take a
	 * lock to make sure we are not racing with the stealer (since the stealer
	 * might have popped an entry and be in the process of updating the entry
	 * that the owner is trying to pop.
	 */

	if (queue->num_sections <= 1)
		return NULL;

	/* Give up if there is contention on the last section */
	if (mono_os_mutex_trylock (&queue->steal_mutex) != 0)
		return NULL;

	sections_remaining = mono_atomic_dec_i32 (&queue->num_sections);
	if (sections_remaining <= 0) {
		/* The section that we tried to steal might be the head of the queue. */
		mono_atomic_inc_i32 (&queue->num_sections);
	} else {
		/* We have reserved for us the tail section of the queue */
		section = queue->last;
		SGEN_ASSERT (0, section, "Why we don't have any sections to steal?");
		SGEN_ASSERT (0, !section->next, "Why aren't we stealing the tail?");
		queue->last = section->prev;
		section->prev = NULL;
		SGEN_ASSERT (0, queue->last, "Why are we stealing the last section?");
		queue->last->next = NULL;

		STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_ENQUEUED, GRAY_QUEUE_SECTION_STATE_FLOATING);
	}

	mono_os_mutex_unlock (&queue->steal_mutex);
	return section;
}
예제 #5
0
파일: sgen-gray.c 프로젝트: medo64/mono
void
sgen_gray_object_alloc_queue_section (SgenGrayQueue *queue, gboolean is_parallel)
{
	GrayQueueSection *section;

	if (queue->free_list) {
		/* Use the previously allocated queue sections if possible */
		section = queue->free_list;
		queue->free_list = section->next;
		STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_FREE_LIST, GRAY_QUEUE_SECTION_STATE_FLOATING);
	} else {
		HEAVY_STAT (stat_gray_queue_section_alloc ++);

		/* Allocate a new section */
		section = (GrayQueueSection *)sgen_alloc_internal (INTERNAL_MEM_GRAY_QUEUE);
		STATE_SET (section, GRAY_QUEUE_SECTION_STATE_FLOATING);
	}

	/* Section is empty */
	section->size = 0;

	STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_FLOATING, GRAY_QUEUE_SECTION_STATE_ENQUEUED);

	/* Link it with the others */
	section->next = queue->first;
	section->prev = NULL;
	if (queue->first)
		queue->first->prev = section;
	else
		queue->last = section;
	queue->first = section;
	queue->cursor = section->entries - 1;

	if (is_parallel) {
		mono_memory_write_barrier ();
		/*
		 * FIXME
		 * we could probably optimize the code to only rely on the write barrier
		 * for synchronization with the stealer thread. Additionally we could also
		 * do a write barrier once every other gray queue change, and request
		 * to have a minimum of sections before stealing, to keep consistency.
		 */
		mono_atomic_inc_i32 (&queue->num_sections);
	} else {
		queue->num_sections++;
	}
}
예제 #6
0
GCObject*
sgen_alloc_obj (GCVTable vtable, size_t size)
{
	GCObject *res;
	TLAB_ACCESS_INIT;

	if (!SGEN_CAN_ALIGN_UP (size))
		return NULL;

	if (G_UNLIKELY (sgen_has_per_allocation_action)) {
		static int alloc_count;
		int current_alloc = mono_atomic_inc_i32 (&alloc_count);

		if (sgen_verify_before_allocs) {
			if ((current_alloc % sgen_verify_before_allocs) == 0) {
				LOCK_GC;
				sgen_check_whole_heap_stw ();
				UNLOCK_GC;
			}
		}
		if (sgen_collect_before_allocs) {
			if (((current_alloc % sgen_collect_before_allocs) == 0) && sgen_nursery_section) {
				LOCK_GC;
				sgen_perform_collection (0, GENERATION_NURSERY, "collect-before-alloc-triggered", TRUE, TRUE);
				UNLOCK_GC;
			}
		}
	}

	ENTER_CRITICAL_REGION;
	res = sgen_try_alloc_obj_nolock (vtable, size);
	if (res) {
		EXIT_CRITICAL_REGION;
		return res;
	}
	EXIT_CRITICAL_REGION;

	LOCK_GC;
	res = sgen_alloc_obj_nolock (vtable, size);
	UNLOCK_GC;
	return res;
}
예제 #7
0
gboolean
sgen_cement_lookup_or_register (GCObject *obj)
{
	guint hv;
	int i;
	CementHashEntry *hash = cement_hash;

	if (!cement_enabled)
		return FALSE;

	hv = sgen_aligned_addr_hash (obj);
	i = SGEN_CEMENT_HASH (hv);

	SGEN_ASSERT (5, sgen_ptr_in_nursery (obj), "Can only cement pointers to nursery objects");

	if (!hash [i].obj) {
		GCObject *old_obj;
		old_obj = (GCObject*)mono_atomic_cas_ptr ((gpointer*)&hash [i].obj, obj, NULL);
		/* Check if the slot was occupied by some other object */
		if (old_obj != NULL && old_obj != obj)
			return FALSE;
	} else if (hash [i].obj != obj) {
		return FALSE;
	}

	if (hash [i].count >= SGEN_CEMENT_THRESHOLD)
		return TRUE;

	if (mono_atomic_inc_i32 ((gint32*)&hash [i].count) == SGEN_CEMENT_THRESHOLD) {
		SGEN_ASSERT (9, sgen_get_current_collection_generation () >= 0, "We can only cement objects when we're in a collection pause.");
		SGEN_ASSERT (9, SGEN_OBJECT_IS_PINNED (obj), "Can only cement pinned objects");
		SGEN_CEMENT_OBJECT (obj);

		sgen_binary_protocol_cement (obj, (gpointer)SGEN_LOAD_VTABLE (obj),
				(int)sgen_safe_object_get_size (obj));
	}

	return FALSE;
}
예제 #8
0
/*
 * Provide a variant that takes just the vtable for small fixed-size objects.
 * The aligned size is already computed and stored in vt->gc_descr.
 * Note: every SGEN_SCAN_START_SIZE or so we are given the chance to do some special
 * processing. We can keep track of where objects start, for example,
 * so when we scan the thread stacks for pinned objects, we can start
 * a search for the pinned object in SGEN_SCAN_START_SIZE chunks.
 */
GCObject*
sgen_alloc_obj_nolock (GCVTable vtable, size_t size)
{
	/* FIXME: handle OOM */
	void **p;
	char *new_next;
	size_t real_size = size;
	TLAB_ACCESS_INIT;
	
	CANARIFY_SIZE(size);

	HEAVY_STAT (++stat_objects_alloced);
	if (real_size <= SGEN_MAX_SMALL_OBJ_SIZE)
		HEAVY_STAT (stat_bytes_alloced += size);
	else
		HEAVY_STAT (stat_bytes_alloced_los += size);

	size = ALIGN_UP (size);

	SGEN_ASSERT (6, sgen_vtable_get_descriptor (vtable), "VTable without descriptor");

	if (G_UNLIKELY (sgen_has_per_allocation_action)) {
		static int alloc_count;
		int current_alloc = mono_atomic_inc_i32 (&alloc_count);

		if (sgen_collect_before_allocs) {
			if (((current_alloc % sgen_collect_before_allocs) == 0) && sgen_nursery_section) {
				sgen_perform_collection (0, GENERATION_NURSERY, "collect-before-alloc-triggered", TRUE, TRUE);
				if (!sgen_degraded_mode && sgen_can_alloc_size (size) && real_size <= SGEN_MAX_SMALL_OBJ_SIZE) {
					// FIXME:
					g_assert_not_reached ();
				}
			}
		} else if (sgen_verify_before_allocs) {
			if ((current_alloc % sgen_verify_before_allocs) == 0)
				sgen_check_whole_heap_stw ();
		}
	}

	/*
	 * We must already have the lock here instead of after the
	 * fast path because we might be interrupted in the fast path
	 * (after confirming that new_next < TLAB_TEMP_END) by the GC,
	 * and we'll end up allocating an object in a fragment which
	 * no longer belongs to us.
	 *
	 * The managed allocator does not do this, but it's treated
	 * specially by the world-stopping code.
	 */

	if (real_size > SGEN_MAX_SMALL_OBJ_SIZE) {
		p = (void **)sgen_los_alloc_large_inner (vtable, ALIGN_UP (real_size));
	} else {
		/* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */

		p = (void**)TLAB_NEXT;
		/* FIXME: handle overflow */
		new_next = (char*)p + size;
		TLAB_NEXT = new_next;

		if (G_LIKELY (new_next < TLAB_TEMP_END)) {
			/* Fast path */

			CANARIFY_ALLOC(p,real_size);
			SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
			sgen_binary_protocol_alloc (p , vtable, size, sgen_client_get_provenance ());
			g_assert (*p == NULL);
			mono_atomic_store_seq (p, vtable);

			return (GCObject*)p;
		}

		/* Slow path */

		/* there are two cases: the object is too big or we run out of space in the TLAB */
		/* we also reach here when the thread does its first allocation after a minor 
		 * collection, since the tlab_ variables are initialized to NULL.
		 * there can be another case (from ORP), if we cooperate with the runtime a bit:
		 * objects that need finalizers can have the high bit set in their size
		 * so the above check fails and we can readily add the object to the queue.
		 * This avoids taking again the GC lock when registering, but this is moot when
		 * doing thread-local allocation, so it may not be a good idea.
		 */
		if (TLAB_NEXT >= TLAB_REAL_END) {
			int available_in_tlab;
			/* 
			 * Run out of space in the TLAB. When this happens, some amount of space
			 * remains in the TLAB, but not enough to satisfy the current allocation
			 * request. Currently, we retire the TLAB in all cases, later we could
			 * keep it if the remaining space is above a treshold, and satisfy the
			 * allocation directly from the nursery.
			 */
			TLAB_NEXT -= size;
			/* when running in degraded mode, we continue allocing that way
			 * for a while, to decrease the number of useless nursery collections.
			 */
			if (sgen_degraded_mode && sgen_degraded_mode < sgen_nursery_size)
				return alloc_degraded (vtable, size, FALSE);

			available_in_tlab = (int)(TLAB_REAL_END - TLAB_NEXT);//We'll never have tlabs > 2Gb
			if (size > sgen_tlab_size || available_in_tlab > SGEN_MAX_NURSERY_WASTE) {
				/* Allocate directly from the nursery */
				p = (void **)sgen_nursery_alloc (size);
				if (!p) {
					/*
					 * We couldn't allocate from the nursery, so we try
					 * collecting.  Even after the collection, we might
					 * still not have enough memory to allocate the
					 * object.  The reason will most likely be that we've
					 * run out of memory, but there is the theoretical
					 * possibility that other threads might have consumed
					 * the freed up memory ahead of us.
					 *
					 * What we do in this case is allocate degraded, i.e.,
					 * from the major heap.
					 *
					 * Ideally we'd like to detect the case of other
					 * threads allocating ahead of us and loop (if we
					 * always loop we will loop endlessly in the case of
					 * OOM).
					 */
					sgen_ensure_free_space (real_size, GENERATION_NURSERY);
					if (!sgen_degraded_mode)
						p = (void **)sgen_nursery_alloc (size);
				}
				if (!p)
					return alloc_degraded (vtable, size, TRUE);

				zero_tlab_if_necessary (p, size);
			} else {
				size_t alloc_size = 0;
				if (TLAB_START)
					SGEN_LOG (3, "Retire TLAB: %p-%p [%ld]", TLAB_START, TLAB_REAL_END, (long)(TLAB_REAL_END - TLAB_NEXT - size));
				sgen_nursery_retire_region (p, available_in_tlab);

				p = (void **)sgen_nursery_alloc_range (sgen_tlab_size, size, &alloc_size);
				if (!p) {
					/* See comment above in similar case. */
					sgen_ensure_free_space (sgen_tlab_size, GENERATION_NURSERY);
					if (!sgen_degraded_mode)
						p = (void **)sgen_nursery_alloc_range (sgen_tlab_size, size, &alloc_size);
				}
				if (!p)
					return alloc_degraded (vtable, size, TRUE);

				/* Allocate a new TLAB from the current nursery fragment */
				TLAB_START = (char*)p;
				TLAB_NEXT = TLAB_START;
				TLAB_REAL_END = TLAB_START + alloc_size;
				TLAB_TEMP_END = TLAB_START + MIN (SGEN_SCAN_START_SIZE, alloc_size);

				zero_tlab_if_necessary (TLAB_START, alloc_size);

				/* Allocate from the TLAB */
				p = (void **)TLAB_NEXT;
				TLAB_NEXT += size;
				sgen_set_nursery_scan_start ((char*)p);
			}
		} else {
			/* Reached tlab_temp_end */

			/* record the scan start so we can find pinned objects more easily */
			sgen_set_nursery_scan_start ((char*)p);
			/* we just bump tlab_temp_end as well */
			TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE);
			SGEN_LOG (5, "Expanding local alloc: %p-%p", TLAB_NEXT, TLAB_TEMP_END);
		}
		CANARIFY_ALLOC(p,real_size);
	}

	if (G_LIKELY (p)) {
		SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
		sgen_binary_protocol_alloc (p, vtable, size, sgen_client_get_provenance ());
		mono_atomic_store_seq (p, vtable);
	}

	return (GCObject*)p;
}
예제 #9
0
static gsize
sampling_thread_func (gpointer unused)
{
	MonoInternalThread *thread = mono_thread_internal_current ();

	thread->flags |= MONO_THREAD_FLAG_DONT_MANAGE;

	ERROR_DECL (error);

	MonoString *name = mono_string_new_checked (mono_get_root_domain (), "Profiler Sampler", error);
	mono_error_assert_ok (error);
	mono_thread_set_name_internal (thread, name, FALSE, FALSE, error);
	mono_error_assert_ok (error);

	mono_thread_info_set_flags (MONO_THREAD_INFO_FLAGS_NO_GC | MONO_THREAD_INFO_FLAGS_NO_SAMPLE);

	int old_policy;
	struct sched_param old_sched;
	pthread_getschedparam (pthread_self (), &old_policy, &old_sched);

	/*
	 * Attempt to switch the thread to real time scheduling. This will not
	 * necessarily work on all OSs; for example, most Linux systems will give
	 * us EPERM here unless configured to allow this.
	 *
	 * TODO: This does not work on Mac (and maybe some other OSs). On Mac, we
	 * have to use the Mach thread policy routines to switch to real-time
	 * scheduling. This is quite tricky as we need to specify how often we'll
	 * be doing work (easy), the normal processing time needed (also easy),
	 * and the maximum amount of processing time needed (hard). This is
	 * further complicated by the fact that if we misbehave and take too long
	 * to do our work, the kernel may knock us back down to the normal thread
	 * scheduling policy without telling us.
	 */
	struct sched_param sched = { .sched_priority = sched_get_priority_max (SCHED_FIFO) };
	pthread_setschedparam (pthread_self (), SCHED_FIFO, &sched);

	MonoProfilerSampleMode mode;

init:
	mono_profiler_get_sample_mode (NULL, &mode, NULL);

	if (mode == MONO_PROFILER_SAMPLE_MODE_NONE) {
		mono_profiler_sampling_thread_wait ();

		if (!mono_atomic_load_i32 (&sampling_thread_running))
			goto done;

		goto init;
	}

	clock_init (mode);

	for (guint64 sleep = clock_get_time_ns (); mono_atomic_load_i32 (&sampling_thread_running); clock_sleep_ns_abs (sleep)) {
		uint32_t freq;
		MonoProfilerSampleMode new_mode;

		mono_profiler_get_sample_mode (NULL, &new_mode, &freq);

		if (new_mode != mode) {
			clock_cleanup ();
			goto init;
		}

		sleep += 1000000000 / freq;

		FOREACH_THREAD_SAFE_EXCLUDE (info, MONO_THREAD_INFO_FLAGS_NO_SAMPLE) {
			g_assert (mono_thread_info_get_tid (info) != sampling_thread);

			/*
			 * Require an ack for the last sampling signal sent to the thread
			 * so that we don't overflow the signal queue, leading to all sorts
			 * of problems (e.g. GC STW failing).
			 */
			if (profiler_signal != SIGPROF && !mono_atomic_cas_i32 (&info->profiler_signal_ack, 0, 1))
				continue;

			mono_threads_pthread_kill (info, profiler_signal);
			mono_atomic_inc_i32 (&profiler_signals_sent);
		} FOREACH_THREAD_SAFE_END
	}