예제 #1
0
/*
 * Mark a given range of memory as invalid.
 *
 * This can be done either by zeroing memory or by placing
 * a phony byte[] array. This keeps the heap forward walkable.
 *
 * This function ignores calls with a zero range, even if
 * both start and end are NULL.
 */
void
sgen_clear_range (char *start, char *end)
{
	size_t size = end - start;

	if ((start && !end) || (start > end))
		g_error ("Invalid range [%p %p]", start, end);

	if (sgen_client_array_fill_range (start, size)) {
		sgen_set_nursery_scan_start (start);
		SGEN_ASSERT (0, start + sgen_safe_object_get_size ((GCObject*)start) == end, "Array fill produced wrong size");
	}
}
예제 #2
0
/*
 * Mark a given range of memory as invalid.
 *
 * This can be done either by zeroing memory or by placing
 * a phony byte[] array. This keeps the heap forward walkable.
 *
 * This function ignores calls with a zero range, even if
 * both start and end are NULL.
 */
void
sgen_clear_range (char *start, char *end)
{
	MonoArray *o;
	size_t size = end - start;

	if ((start && !end) || (start > end))
		g_error ("Invalid range [%p %p]", start, end);

	if (size < sizeof (MonoArray)) {
		memset (start, 0, size);
		return;
	}

	o = (MonoArray*)start;
	o->obj.vtable = sgen_get_array_fill_vtable ();
	/* Mark this as not a real object */
	o->obj.synchronisation = GINT_TO_POINTER (-1);
	o->bounds = NULL;
	o->max_length = (mono_array_size_t)(size - sizeof (MonoArray));
	sgen_set_nursery_scan_start (start);
	g_assert (start + sgen_safe_object_get_size ((MonoObject*)o) == end);
}
예제 #3
0
mword
sgen_build_nursery_fragments (GCMemSection *nursery_section, SgenGrayQueue *unpin_queue)
{
	char *frag_start, *frag_end;
	size_t frag_size;
	SgenFragment *frags_ranges;
	void **pin_start, **pin_entry, **pin_end;

#ifdef NALLOC_DEBUG
	reset_alloc_records ();
#endif
	/*The mutator fragments are done. We no longer need them. */
	sgen_fragment_allocator_release (&mutator_allocator);

	frag_start = sgen_nursery_start;
	fragment_total = 0;

	/* The current nursery might give us a fragment list to exclude [start, next[*/
	frags_ranges = sgen_minor_collector.build_fragments_get_exclude_head ();

	/* clear scan starts */
	memset (nursery_section->scan_starts, 0, nursery_section->num_scan_start * sizeof (gpointer));

	pin_start = pin_entry = sgen_pinning_get_entry (nursery_section->pin_queue_first_entry);
	pin_end = sgen_pinning_get_entry (nursery_section->pin_queue_last_entry);

	while (pin_entry < pin_end || frags_ranges) {
		char *addr0, *addr1;
		size_t size;

		addr0 = addr1 = sgen_nursery_end;
		if (pin_entry < pin_end)
			addr0 = (char *)*pin_entry;
		if (frags_ranges)
			addr1 = frags_ranges->fragment_start;

		if (addr0 < addr1) {
			if (unpin_queue)
				GRAY_OBJECT_ENQUEUE (unpin_queue, (GCObject*)addr0, sgen_obj_get_descriptor_safe ((GCObject*)addr0));
			else
				SGEN_UNPIN_OBJECT (addr0);
			size = SGEN_ALIGN_UP (sgen_safe_object_get_size ((GCObject*)addr0));
			CANARIFY_SIZE (size);
			sgen_set_nursery_scan_start (addr0);
			frag_end = addr0;
			++pin_entry;
		} else {
			frag_end = addr1;
			size = frags_ranges->fragment_next - addr1;
			frags_ranges = frags_ranges->next_in_order;
		}

		frag_size = frag_end - frag_start;

		if (size == 0)
			continue;

		g_assert (frag_size >= 0);
		g_assert (size > 0);
		if (frag_size && size)
			add_nursery_frag (&mutator_allocator, frag_size, frag_start, frag_end);	

		frag_size = size;
#ifdef NALLOC_DEBUG
		add_alloc_record (*pin_entry, frag_size, PINNING);
#endif
		frag_start = frag_end + frag_size;
	}

	nursery_last_pinned_end = frag_start;
	frag_end = sgen_nursery_end;
	frag_size = frag_end - frag_start;
	if (frag_size)
		add_nursery_frag (&mutator_allocator, frag_size, frag_start, frag_end);

	/* Now it's safe to release the fragments exclude list. */
	sgen_minor_collector.build_fragments_release_exclude_head ();

	/* First we reorder the fragment list to be in ascending address order. This makes H/W prefetchers happier. */
	fragment_list_reverse (&mutator_allocator);

	/*The collector might want to do something with the final nursery fragment list.*/
	sgen_minor_collector.build_fragments_finish (&mutator_allocator);

	if (!unmask (mutator_allocator.alloc_head)) {
		SGEN_LOG (1, "Nursery fully pinned");
		for (pin_entry = pin_start; pin_entry < pin_end; ++pin_entry) {
			GCObject *p = (GCObject *)*pin_entry;
			SGEN_LOG (3, "Bastard pinning obj %p (%s), size: %zd", p, sgen_client_vtable_get_name (SGEN_LOAD_VTABLE (p)), sgen_safe_object_get_size (p));
		}
	}
	return fragment_total;
}
예제 #4
0
파일: sgen-alloc.c 프로젝트: Lavesson/mono
static void*
mono_gc_try_alloc_obj_nolock (MonoVTable *vtable, size_t size)
{
	void **p;
	char *new_next;
	TLAB_ACCESS_INIT;

	size = ALIGN_UP (size);

	g_assert (vtable->gc_descr);
	if (size > SGEN_MAX_SMALL_OBJ_SIZE)
		return NULL;

	if (G_UNLIKELY (size > tlab_size)) {
		/* Allocate directly from the nursery */
		p = sgen_nursery_alloc (size);
		if (!p)
			return NULL;
		sgen_set_nursery_scan_start ((char*)p);

		/*FIXME we should use weak memory ops here. Should help specially on x86. */
		if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
			memset (p, 0, size);
	} else {
		int available_in_tlab;
		char *real_end;
		/* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */

		p = (void**)TLAB_NEXT;
		/* FIXME: handle overflow */
		new_next = (char*)p + size;

		real_end = TLAB_REAL_END;
		available_in_tlab = real_end - (char*)p;

		if (G_LIKELY (new_next < real_end)) {
			TLAB_NEXT = new_next;

			/* Second case, we overflowed temp end */
			if (G_UNLIKELY (new_next >= TLAB_TEMP_END)) {
				sgen_set_nursery_scan_start (new_next);
				/* we just bump tlab_temp_end as well */
				TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE);
				DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", TLAB_NEXT, TLAB_TEMP_END));		
			}
		} else if (available_in_tlab > SGEN_MAX_NURSERY_WASTE) {
			/* Allocate directly from the nursery */
			p = sgen_nursery_alloc (size);
			if (!p)
				return NULL;

			if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
				memset (p, 0, size);			
		} else {
			size_t alloc_size = 0;

			sgen_nursery_retire_region (p, available_in_tlab);
			new_next = sgen_nursery_alloc_range (tlab_size, size, &alloc_size);
			p = (void**)new_next;
			if (!p)
				return NULL;

			TLAB_START = (char*)new_next;
			TLAB_NEXT = new_next + size;
			TLAB_REAL_END = new_next + alloc_size;
			TLAB_TEMP_END = new_next + MIN (SGEN_SCAN_START_SIZE, alloc_size);
			sgen_set_nursery_scan_start ((char*)p);

			if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
				memset (new_next, 0, alloc_size);

			MONO_GC_NURSERY_TLAB_ALLOC ((mword)new_next, alloc_size);
		}
	}

	HEAVY_STAT (++stat_objects_alloced);
	HEAVY_STAT (stat_bytes_alloced += size);

	DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
	binary_protocol_alloc (p, vtable, size);
	if (G_UNLIKELY (MONO_GC_NURSERY_OBJ_ALLOC_ENABLED ()))
		MONO_GC_NURSERY_OBJ_ALLOC ((mword)p, size, vtable->klass->name_space, vtable->klass->name);
	g_assert (*p == NULL); /* FIXME disable this in non debug builds */

	mono_atomic_store_seq (p, vtable);

	return p;
}
예제 #5
0
파일: sgen-alloc.c 프로젝트: Lavesson/mono
/*
 * Provide a variant that takes just the vtable for small fixed-size objects.
 * The aligned size is already computed and stored in vt->gc_descr.
 * Note: every SGEN_SCAN_START_SIZE or so we are given the chance to do some special
 * processing. We can keep track of where objects start, for example,
 * so when we scan the thread stacks for pinned objects, we can start
 * a search for the pinned object in SGEN_SCAN_START_SIZE chunks.
 */
static void*
mono_gc_alloc_obj_nolock (MonoVTable *vtable, size_t size)
{
	/* FIXME: handle OOM */
	void **p;
	char *new_next;
	TLAB_ACCESS_INIT;

	HEAVY_STAT (++stat_objects_alloced);
	if (size <= SGEN_MAX_SMALL_OBJ_SIZE)
		HEAVY_STAT (stat_bytes_alloced += size);
	else
		HEAVY_STAT (stat_bytes_alloced_los += size);

	size = ALIGN_UP (size);

	g_assert (vtable->gc_descr);

	if (G_UNLIKELY (has_per_allocation_action)) {
		static int alloc_count;
		int current_alloc = InterlockedIncrement (&alloc_count);

		if (collect_before_allocs) {
			if (((current_alloc % collect_before_allocs) == 0) && nursery_section) {
				sgen_perform_collection (0, GENERATION_NURSERY, "collect-before-alloc-triggered");
				if (!degraded_mode && sgen_can_alloc_size (size) && size <= SGEN_MAX_SMALL_OBJ_SIZE) {
					// FIXME:
					g_assert_not_reached ();
				}
			}
		} else if (verify_before_allocs) {
			if ((current_alloc % verify_before_allocs) == 0)
				sgen_check_whole_heap_stw ();
		}
	}

	/*
	 * We must already have the lock here instead of after the
	 * fast path because we might be interrupted in the fast path
	 * (after confirming that new_next < TLAB_TEMP_END) by the GC,
	 * and we'll end up allocating an object in a fragment which
	 * no longer belongs to us.
	 *
	 * The managed allocator does not do this, but it's treated
	 * specially by the world-stopping code.
	 */

	if (size > SGEN_MAX_SMALL_OBJ_SIZE) {
		p = sgen_los_alloc_large_inner (vtable, size);
	} else {
		/* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */

		p = (void**)TLAB_NEXT;
		/* FIXME: handle overflow */
		new_next = (char*)p + size;
		TLAB_NEXT = new_next;

		if (G_LIKELY (new_next < TLAB_TEMP_END)) {
			/* Fast path */

			/* 
			 * FIXME: We might need a memory barrier here so the change to tlab_next is 
			 * visible before the vtable store.
			 */

			DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
			binary_protocol_alloc (p , vtable, size);
			if (G_UNLIKELY (MONO_GC_NURSERY_OBJ_ALLOC_ENABLED ()))
				MONO_GC_NURSERY_OBJ_ALLOC ((mword)p, size, vtable->klass->name_space, vtable->klass->name);
			g_assert (*p == NULL);
			mono_atomic_store_seq (p, vtable);

			return p;
		}

		/* Slow path */

		/* there are two cases: the object is too big or we run out of space in the TLAB */
		/* we also reach here when the thread does its first allocation after a minor 
		 * collection, since the tlab_ variables are initialized to NULL.
		 * there can be another case (from ORP), if we cooperate with the runtime a bit:
		 * objects that need finalizers can have the high bit set in their size
		 * so the above check fails and we can readily add the object to the queue.
		 * This avoids taking again the GC lock when registering, but this is moot when
		 * doing thread-local allocation, so it may not be a good idea.
		 */
		if (TLAB_NEXT >= TLAB_REAL_END) {
			int available_in_tlab;
			/* 
			 * Run out of space in the TLAB. When this happens, some amount of space
			 * remains in the TLAB, but not enough to satisfy the current allocation
			 * request. Currently, we retire the TLAB in all cases, later we could
			 * keep it if the remaining space is above a treshold, and satisfy the
			 * allocation directly from the nursery.
			 */
			TLAB_NEXT -= size;
			/* when running in degraded mode, we continue allocing that way
			 * for a while, to decrease the number of useless nursery collections.
			 */
			if (degraded_mode && degraded_mode < DEFAULT_NURSERY_SIZE)
				return alloc_degraded (vtable, size, FALSE);

			available_in_tlab = TLAB_REAL_END - TLAB_NEXT;
			if (size > tlab_size || available_in_tlab > SGEN_MAX_NURSERY_WASTE) {
				/* Allocate directly from the nursery */
				do {
					p = sgen_nursery_alloc (size);
					if (!p) {
						sgen_ensure_free_space (size);
						if (degraded_mode)
							return alloc_degraded (vtable, size, FALSE);
						else
							p = sgen_nursery_alloc (size);
					}
				} while (!p);
				if (!p) {
					// no space left
					g_assert (0);
				}

				if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) {
					memset (p, 0, size);
				}
			} else {
				size_t alloc_size = 0;
				if (TLAB_START)
					DEBUG (3, fprintf (gc_debug_file, "Retire TLAB: %p-%p [%ld]\n", TLAB_START, TLAB_REAL_END, (long)(TLAB_REAL_END - TLAB_NEXT - size)));
				sgen_nursery_retire_region (p, available_in_tlab);

				do {
					p = sgen_nursery_alloc_range (tlab_size, size, &alloc_size);
					if (!p) {
						sgen_ensure_free_space (tlab_size);
						if (degraded_mode)
							return alloc_degraded (vtable, size, FALSE);
						else
							p = sgen_nursery_alloc_range (tlab_size, size, &alloc_size);
					}
				} while (!p);
					
				if (!p) {
					// no space left
					g_assert (0);
				}

				/* Allocate a new TLAB from the current nursery fragment */
				TLAB_START = (char*)p;
				TLAB_NEXT = TLAB_START;
				TLAB_REAL_END = TLAB_START + alloc_size;
				TLAB_TEMP_END = TLAB_START + MIN (SGEN_SCAN_START_SIZE, alloc_size);

				if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) {
					memset (TLAB_START, 0, alloc_size);
				}

				/* Allocate from the TLAB */
				p = (void*)TLAB_NEXT;
				TLAB_NEXT += size;
				sgen_set_nursery_scan_start ((char*)p);
			}
		} else {
			/* Reached tlab_temp_end */

			/* record the scan start so we can find pinned objects more easily */
			sgen_set_nursery_scan_start ((char*)p);
			/* we just bump tlab_temp_end as well */
			TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE);
			DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", TLAB_NEXT, TLAB_TEMP_END));
		}
	}

	if (G_LIKELY (p)) {
		DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
		binary_protocol_alloc (p, vtable, size);
		if (G_UNLIKELY (MONO_GC_MAJOR_OBJ_ALLOC_LARGE_ENABLED ()|| MONO_GC_NURSERY_OBJ_ALLOC_ENABLED ())) {
			if (size > SGEN_MAX_SMALL_OBJ_SIZE)
				MONO_GC_MAJOR_OBJ_ALLOC_LARGE ((mword)p, size, vtable->klass->name_space, vtable->klass->name);
			else
				MONO_GC_NURSERY_OBJ_ALLOC ((mword)p, size, vtable->klass->name_space, vtable->klass->name);
		}
		mono_atomic_store_seq (p, vtable);
	}

	return p;
}
예제 #6
0
mword
sgen_build_nursery_fragments (GCMemSection *nursery_section, void **start, size_t num_entries, SgenGrayQueue *unpin_queue)
{
	char *frag_start, *frag_end;
	size_t frag_size;
	size_t i = 0;
	SgenFragment *frags_ranges;

#ifdef NALLOC_DEBUG
	reset_alloc_records ();
#endif
	/*The mutator fragments are done. We no longer need them. */
	sgen_fragment_allocator_release (&mutator_allocator);

	frag_start = sgen_nursery_start;
	fragment_total = 0;

	/* The current nursery might give us a fragment list to exclude [start, next[*/
	frags_ranges = sgen_minor_collector.build_fragments_get_exclude_head ();

	/* clear scan starts */
	memset (nursery_section->scan_starts, 0, nursery_section->num_scan_start * sizeof (gpointer));

	while (i < num_entries || frags_ranges) {
		char *addr0, *addr1;
		size_t size;
		SgenFragment *last_frag = NULL;

		addr0 = addr1 = sgen_nursery_end;
		if (i < num_entries)
			addr0 = start [i];
		if (frags_ranges)
			addr1 = frags_ranges->fragment_start;

		if (addr0 < addr1) {
			if (unpin_queue)
				GRAY_OBJECT_ENQUEUE (unpin_queue, addr0);
			else
				SGEN_UNPIN_OBJECT (addr0);
			sgen_set_nursery_scan_start (addr0);
			frag_end = addr0;
			size = SGEN_ALIGN_UP (sgen_safe_object_get_size ((MonoObject*)addr0));
			++i;
		} else {
			frag_end = addr1;
			size = frags_ranges->fragment_next - addr1;
			last_frag = frags_ranges;
			frags_ranges = frags_ranges->next_in_order;
		}

		frag_size = frag_end - frag_start;

		if (size == 0)
			continue;

		g_assert (frag_size >= 0);
		g_assert (size > 0);
		if (frag_size && size)
			add_nursery_frag (&mutator_allocator, frag_size, frag_start, frag_end);	

		frag_size = size;
#ifdef NALLOC_DEBUG
		add_alloc_record (start [i], frag_size, PINNING);
#endif
		frag_start = frag_end + frag_size;
	}

	nursery_last_pinned_end = frag_start;
	frag_end = sgen_nursery_end;
	frag_size = frag_end - frag_start;
	if (frag_size)
		add_nursery_frag (&mutator_allocator, frag_size, frag_start, frag_end);

	/* Now it's safe to release the fragments exclude list. */
	sgen_minor_collector.build_fragments_release_exclude_head ();

	/* First we reorder the fragment list to be in ascending address order. This makes H/W prefetchers happier. */
	fragment_list_reverse (&mutator_allocator);

	/*The collector might want to do something with the final nursery fragment list.*/
	sgen_minor_collector.build_fragments_finish (&mutator_allocator);

	if (!unmask (mutator_allocator.alloc_head)) {
		SGEN_LOG (1, "Nursery fully pinned (%zd)", num_entries);
		for (i = 0; i < num_entries; ++i) {
			SGEN_LOG (3, "Bastard pinning obj %p (%s), size: %zd", start [i], sgen_safe_name (start [i]), sgen_safe_object_get_size (start [i]));
		}
	}
	return fragment_total;
}
예제 #7
0
GCObject*
sgen_try_alloc_obj_nolock (GCVTable vtable, size_t size)
{
	void **p;
	char *new_next;
	size_t real_size = size;
	TLAB_ACCESS_INIT;

	CANARIFY_SIZE(size);

	size = ALIGN_UP (size);
	SGEN_ASSERT (9, real_size >= SGEN_CLIENT_MINIMUM_OBJECT_SIZE, "Object too small");

	SGEN_ASSERT (6, sgen_vtable_get_descriptor (vtable), "VTable without descriptor");

	if (real_size > SGEN_MAX_SMALL_OBJ_SIZE)
		return NULL;

	if (G_UNLIKELY (size > sgen_tlab_size)) {
		/* Allocate directly from the nursery */
		p = (void **)sgen_nursery_alloc (size);
		if (!p)
			return NULL;
		sgen_set_nursery_scan_start ((char*)p);

		/*FIXME we should use weak memory ops here. Should help specially on x86. */
		zero_tlab_if_necessary (p, size);
	} else {
		int available_in_tlab;
		char *real_end;
		/* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */

		p = (void**)TLAB_NEXT;
		/* FIXME: handle overflow */
		new_next = (char*)p + size;

		real_end = TLAB_REAL_END;
		available_in_tlab = (int)(real_end - (char*)p);//We'll never have tlabs > 2Gb

		if (G_LIKELY (new_next < real_end)) {
			TLAB_NEXT = new_next;

			/* Second case, we overflowed temp end */
			if (G_UNLIKELY (new_next >= TLAB_TEMP_END)) {
				sgen_set_nursery_scan_start (new_next);
				/* we just bump tlab_temp_end as well */
				TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE);
				SGEN_LOG (5, "Expanding local alloc: %p-%p", TLAB_NEXT, TLAB_TEMP_END);
			}
		} else if (available_in_tlab > SGEN_MAX_NURSERY_WASTE) {
			/* Allocate directly from the nursery */
			p = (void **)sgen_nursery_alloc (size);
			if (!p)
				return NULL;

			zero_tlab_if_necessary (p, size);
		} else {
			size_t alloc_size = 0;

			sgen_nursery_retire_region (p, available_in_tlab);
			new_next = (char *)sgen_nursery_alloc_range (sgen_tlab_size, size, &alloc_size);
			p = (void**)new_next;
			if (!p)
				return NULL;

			TLAB_START = (char*)new_next;
			TLAB_NEXT = new_next + size;
			TLAB_REAL_END = new_next + alloc_size;
			TLAB_TEMP_END = new_next + MIN (SGEN_SCAN_START_SIZE, alloc_size);
			sgen_set_nursery_scan_start ((char*)p);

			zero_tlab_if_necessary (new_next, alloc_size);
		}
	}

	HEAVY_STAT (++stat_objects_alloced);
	HEAVY_STAT (stat_bytes_alloced += size);

	CANARIFY_ALLOC(p,real_size);
	SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
	sgen_binary_protocol_alloc (p, vtable, size, sgen_client_get_provenance ());
	g_assert (*p == NULL); /* FIXME disable this in non debug builds */

	mono_atomic_store_seq (p, vtable);

	return (GCObject*)p;
}
예제 #8
0
/*
 * Provide a variant that takes just the vtable for small fixed-size objects.
 * The aligned size is already computed and stored in vt->gc_descr.
 * Note: every SGEN_SCAN_START_SIZE or so we are given the chance to do some special
 * processing. We can keep track of where objects start, for example,
 * so when we scan the thread stacks for pinned objects, we can start
 * a search for the pinned object in SGEN_SCAN_START_SIZE chunks.
 */
GCObject*
sgen_alloc_obj_nolock (GCVTable vtable, size_t size)
{
	/* FIXME: handle OOM */
	void **p;
	char *new_next;
	size_t real_size = size;
	TLAB_ACCESS_INIT;
	
	CANARIFY_SIZE(size);

	HEAVY_STAT (++stat_objects_alloced);
	if (real_size <= SGEN_MAX_SMALL_OBJ_SIZE)
		HEAVY_STAT (stat_bytes_alloced += size);
	else
		HEAVY_STAT (stat_bytes_alloced_los += size);

	size = ALIGN_UP (size);

	SGEN_ASSERT (6, sgen_vtable_get_descriptor (vtable), "VTable without descriptor");

	if (G_UNLIKELY (sgen_has_per_allocation_action)) {
		static int alloc_count;
		int current_alloc = mono_atomic_inc_i32 (&alloc_count);

		if (sgen_collect_before_allocs) {
			if (((current_alloc % sgen_collect_before_allocs) == 0) && sgen_nursery_section) {
				sgen_perform_collection (0, GENERATION_NURSERY, "collect-before-alloc-triggered", TRUE, TRUE);
				if (!sgen_degraded_mode && sgen_can_alloc_size (size) && real_size <= SGEN_MAX_SMALL_OBJ_SIZE) {
					// FIXME:
					g_assert_not_reached ();
				}
			}
		} else if (sgen_verify_before_allocs) {
			if ((current_alloc % sgen_verify_before_allocs) == 0)
				sgen_check_whole_heap_stw ();
		}
	}

	/*
	 * We must already have the lock here instead of after the
	 * fast path because we might be interrupted in the fast path
	 * (after confirming that new_next < TLAB_TEMP_END) by the GC,
	 * and we'll end up allocating an object in a fragment which
	 * no longer belongs to us.
	 *
	 * The managed allocator does not do this, but it's treated
	 * specially by the world-stopping code.
	 */

	if (real_size > SGEN_MAX_SMALL_OBJ_SIZE) {
		p = (void **)sgen_los_alloc_large_inner (vtable, ALIGN_UP (real_size));
	} else {
		/* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */

		p = (void**)TLAB_NEXT;
		/* FIXME: handle overflow */
		new_next = (char*)p + size;
		TLAB_NEXT = new_next;

		if (G_LIKELY (new_next < TLAB_TEMP_END)) {
			/* Fast path */

			CANARIFY_ALLOC(p,real_size);
			SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
			sgen_binary_protocol_alloc (p , vtable, size, sgen_client_get_provenance ());
			g_assert (*p == NULL);
			mono_atomic_store_seq (p, vtable);

			return (GCObject*)p;
		}

		/* Slow path */

		/* there are two cases: the object is too big or we run out of space in the TLAB */
		/* we also reach here when the thread does its first allocation after a minor 
		 * collection, since the tlab_ variables are initialized to NULL.
		 * there can be another case (from ORP), if we cooperate with the runtime a bit:
		 * objects that need finalizers can have the high bit set in their size
		 * so the above check fails and we can readily add the object to the queue.
		 * This avoids taking again the GC lock when registering, but this is moot when
		 * doing thread-local allocation, so it may not be a good idea.
		 */
		if (TLAB_NEXT >= TLAB_REAL_END) {
			int available_in_tlab;
			/* 
			 * Run out of space in the TLAB. When this happens, some amount of space
			 * remains in the TLAB, but not enough to satisfy the current allocation
			 * request. Currently, we retire the TLAB in all cases, later we could
			 * keep it if the remaining space is above a treshold, and satisfy the
			 * allocation directly from the nursery.
			 */
			TLAB_NEXT -= size;
			/* when running in degraded mode, we continue allocing that way
			 * for a while, to decrease the number of useless nursery collections.
			 */
			if (sgen_degraded_mode && sgen_degraded_mode < sgen_nursery_size)
				return alloc_degraded (vtable, size, FALSE);

			available_in_tlab = (int)(TLAB_REAL_END - TLAB_NEXT);//We'll never have tlabs > 2Gb
			if (size > sgen_tlab_size || available_in_tlab > SGEN_MAX_NURSERY_WASTE) {
				/* Allocate directly from the nursery */
				p = (void **)sgen_nursery_alloc (size);
				if (!p) {
					/*
					 * We couldn't allocate from the nursery, so we try
					 * collecting.  Even after the collection, we might
					 * still not have enough memory to allocate the
					 * object.  The reason will most likely be that we've
					 * run out of memory, but there is the theoretical
					 * possibility that other threads might have consumed
					 * the freed up memory ahead of us.
					 *
					 * What we do in this case is allocate degraded, i.e.,
					 * from the major heap.
					 *
					 * Ideally we'd like to detect the case of other
					 * threads allocating ahead of us and loop (if we
					 * always loop we will loop endlessly in the case of
					 * OOM).
					 */
					sgen_ensure_free_space (real_size, GENERATION_NURSERY);
					if (!sgen_degraded_mode)
						p = (void **)sgen_nursery_alloc (size);
				}
				if (!p)
					return alloc_degraded (vtable, size, TRUE);

				zero_tlab_if_necessary (p, size);
			} else {
				size_t alloc_size = 0;
				if (TLAB_START)
					SGEN_LOG (3, "Retire TLAB: %p-%p [%ld]", TLAB_START, TLAB_REAL_END, (long)(TLAB_REAL_END - TLAB_NEXT - size));
				sgen_nursery_retire_region (p, available_in_tlab);

				p = (void **)sgen_nursery_alloc_range (sgen_tlab_size, size, &alloc_size);
				if (!p) {
					/* See comment above in similar case. */
					sgen_ensure_free_space (sgen_tlab_size, GENERATION_NURSERY);
					if (!sgen_degraded_mode)
						p = (void **)sgen_nursery_alloc_range (sgen_tlab_size, size, &alloc_size);
				}
				if (!p)
					return alloc_degraded (vtable, size, TRUE);

				/* Allocate a new TLAB from the current nursery fragment */
				TLAB_START = (char*)p;
				TLAB_NEXT = TLAB_START;
				TLAB_REAL_END = TLAB_START + alloc_size;
				TLAB_TEMP_END = TLAB_START + MIN (SGEN_SCAN_START_SIZE, alloc_size);

				zero_tlab_if_necessary (TLAB_START, alloc_size);

				/* Allocate from the TLAB */
				p = (void **)TLAB_NEXT;
				TLAB_NEXT += size;
				sgen_set_nursery_scan_start ((char*)p);
			}
		} else {
			/* Reached tlab_temp_end */

			/* record the scan start so we can find pinned objects more easily */
			sgen_set_nursery_scan_start ((char*)p);
			/* we just bump tlab_temp_end as well */
			TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE);
			SGEN_LOG (5, "Expanding local alloc: %p-%p", TLAB_NEXT, TLAB_TEMP_END);
		}
		CANARIFY_ALLOC(p,real_size);
	}

	if (G_LIKELY (p)) {
		SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
		sgen_binary_protocol_alloc (p, vtable, size, sgen_client_get_provenance ());
		mono_atomic_store_seq (p, vtable);
	}

	return (GCObject*)p;
}