static void* mono_gc_try_alloc_obj_nolock (MonoVTable *vtable, size_t size) { void **p; char *new_next; TLAB_ACCESS_INIT; size = ALIGN_UP (size); g_assert (vtable->gc_descr); if (size > SGEN_MAX_SMALL_OBJ_SIZE) return NULL; if (G_UNLIKELY (size > tlab_size)) { /* Allocate directly from the nursery */ p = sgen_nursery_alloc (size); if (!p) return NULL; sgen_set_nursery_scan_start ((char*)p); /*FIXME we should use weak memory ops here. Should help specially on x86. */ if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) memset (p, 0, size); } else { int available_in_tlab; char *real_end; /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */ p = (void**)TLAB_NEXT; /* FIXME: handle overflow */ new_next = (char*)p + size; real_end = TLAB_REAL_END; available_in_tlab = real_end - (char*)p; if (G_LIKELY (new_next < real_end)) { TLAB_NEXT = new_next; /* Second case, we overflowed temp end */ if (G_UNLIKELY (new_next >= TLAB_TEMP_END)) { sgen_set_nursery_scan_start (new_next); /* we just bump tlab_temp_end as well */ TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE); DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", TLAB_NEXT, TLAB_TEMP_END)); } } else if (available_in_tlab > SGEN_MAX_NURSERY_WASTE) { /* Allocate directly from the nursery */ p = sgen_nursery_alloc (size); if (!p) return NULL; if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) memset (p, 0, size); } else { size_t alloc_size = 0; sgen_nursery_retire_region (p, available_in_tlab); new_next = sgen_nursery_alloc_range (tlab_size, size, &alloc_size); p = (void**)new_next; if (!p) return NULL; TLAB_START = (char*)new_next; TLAB_NEXT = new_next + size; TLAB_REAL_END = new_next + alloc_size; TLAB_TEMP_END = new_next + MIN (SGEN_SCAN_START_SIZE, alloc_size); sgen_set_nursery_scan_start ((char*)p); if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) memset (new_next, 0, alloc_size); MONO_GC_NURSERY_TLAB_ALLOC ((mword)new_next, alloc_size); } } HEAVY_STAT (++stat_objects_alloced); HEAVY_STAT (stat_bytes_alloced += size); DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size)); binary_protocol_alloc (p, vtable, size); if (G_UNLIKELY (MONO_GC_NURSERY_OBJ_ALLOC_ENABLED ())) MONO_GC_NURSERY_OBJ_ALLOC ((mword)p, size, vtable->klass->name_space, vtable->klass->name); g_assert (*p == NULL); /* FIXME disable this in non debug builds */ mono_atomic_store_seq (p, vtable); return p; }
GCObject* sgen_try_alloc_obj_nolock (GCVTable vtable, size_t size) { void **p; char *new_next; size_t real_size = size; TLAB_ACCESS_INIT; CANARIFY_SIZE(size); size = ALIGN_UP (size); SGEN_ASSERT (9, real_size >= SGEN_CLIENT_MINIMUM_OBJECT_SIZE, "Object too small"); SGEN_ASSERT (6, sgen_vtable_get_descriptor (vtable), "VTable without descriptor"); if (real_size > SGEN_MAX_SMALL_OBJ_SIZE) return NULL; if (G_UNLIKELY (size > sgen_tlab_size)) { /* Allocate directly from the nursery */ p = (void **)sgen_nursery_alloc (size); if (!p) return NULL; sgen_set_nursery_scan_start ((char*)p); /*FIXME we should use weak memory ops here. Should help specially on x86. */ zero_tlab_if_necessary (p, size); } else { int available_in_tlab; char *real_end; /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */ p = (void**)TLAB_NEXT; /* FIXME: handle overflow */ new_next = (char*)p + size; real_end = TLAB_REAL_END; available_in_tlab = (int)(real_end - (char*)p);//We'll never have tlabs > 2Gb if (G_LIKELY (new_next < real_end)) { TLAB_NEXT = new_next; /* Second case, we overflowed temp end */ if (G_UNLIKELY (new_next >= TLAB_TEMP_END)) { sgen_set_nursery_scan_start (new_next); /* we just bump tlab_temp_end as well */ TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE); SGEN_LOG (5, "Expanding local alloc: %p-%p", TLAB_NEXT, TLAB_TEMP_END); } } else if (available_in_tlab > SGEN_MAX_NURSERY_WASTE) { /* Allocate directly from the nursery */ p = (void **)sgen_nursery_alloc (size); if (!p) return NULL; zero_tlab_if_necessary (p, size); } else { size_t alloc_size = 0; sgen_nursery_retire_region (p, available_in_tlab); new_next = (char *)sgen_nursery_alloc_range (sgen_tlab_size, size, &alloc_size); p = (void**)new_next; if (!p) return NULL; TLAB_START = (char*)new_next; TLAB_NEXT = new_next + size; TLAB_REAL_END = new_next + alloc_size; TLAB_TEMP_END = new_next + MIN (SGEN_SCAN_START_SIZE, alloc_size); sgen_set_nursery_scan_start ((char*)p); zero_tlab_if_necessary (new_next, alloc_size); } } HEAVY_STAT (++stat_objects_alloced); HEAVY_STAT (stat_bytes_alloced += size); CANARIFY_ALLOC(p,real_size); SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size); sgen_binary_protocol_alloc (p, vtable, size, sgen_client_get_provenance ()); g_assert (*p == NULL); /* FIXME disable this in non debug builds */ mono_atomic_store_seq (p, vtable); return (GCObject*)p; }
/* * Provide a variant that takes just the vtable for small fixed-size objects. * The aligned size is already computed and stored in vt->gc_descr. * Note: every SGEN_SCAN_START_SIZE or so we are given the chance to do some special * processing. We can keep track of where objects start, for example, * so when we scan the thread stacks for pinned objects, we can start * a search for the pinned object in SGEN_SCAN_START_SIZE chunks. */ static void* mono_gc_alloc_obj_nolock (MonoVTable *vtable, size_t size) { /* FIXME: handle OOM */ void **p; char *new_next; TLAB_ACCESS_INIT; HEAVY_STAT (++stat_objects_alloced); if (size <= SGEN_MAX_SMALL_OBJ_SIZE) HEAVY_STAT (stat_bytes_alloced += size); else HEAVY_STAT (stat_bytes_alloced_los += size); size = ALIGN_UP (size); g_assert (vtable->gc_descr); if (G_UNLIKELY (has_per_allocation_action)) { static int alloc_count; int current_alloc = InterlockedIncrement (&alloc_count); if (collect_before_allocs) { if (((current_alloc % collect_before_allocs) == 0) && nursery_section) { sgen_perform_collection (0, GENERATION_NURSERY, "collect-before-alloc-triggered"); if (!degraded_mode && sgen_can_alloc_size (size) && size <= SGEN_MAX_SMALL_OBJ_SIZE) { // FIXME: g_assert_not_reached (); } } } else if (verify_before_allocs) { if ((current_alloc % verify_before_allocs) == 0) sgen_check_whole_heap_stw (); } } /* * We must already have the lock here instead of after the * fast path because we might be interrupted in the fast path * (after confirming that new_next < TLAB_TEMP_END) by the GC, * and we'll end up allocating an object in a fragment which * no longer belongs to us. * * The managed allocator does not do this, but it's treated * specially by the world-stopping code. */ if (size > SGEN_MAX_SMALL_OBJ_SIZE) { p = sgen_los_alloc_large_inner (vtable, size); } else { /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */ p = (void**)TLAB_NEXT; /* FIXME: handle overflow */ new_next = (char*)p + size; TLAB_NEXT = new_next; if (G_LIKELY (new_next < TLAB_TEMP_END)) { /* Fast path */ /* * FIXME: We might need a memory barrier here so the change to tlab_next is * visible before the vtable store. */ DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size)); binary_protocol_alloc (p , vtable, size); if (G_UNLIKELY (MONO_GC_NURSERY_OBJ_ALLOC_ENABLED ())) MONO_GC_NURSERY_OBJ_ALLOC ((mword)p, size, vtable->klass->name_space, vtable->klass->name); g_assert (*p == NULL); mono_atomic_store_seq (p, vtable); return p; } /* Slow path */ /* there are two cases: the object is too big or we run out of space in the TLAB */ /* we also reach here when the thread does its first allocation after a minor * collection, since the tlab_ variables are initialized to NULL. * there can be another case (from ORP), if we cooperate with the runtime a bit: * objects that need finalizers can have the high bit set in their size * so the above check fails and we can readily add the object to the queue. * This avoids taking again the GC lock when registering, but this is moot when * doing thread-local allocation, so it may not be a good idea. */ if (TLAB_NEXT >= TLAB_REAL_END) { int available_in_tlab; /* * Run out of space in the TLAB. When this happens, some amount of space * remains in the TLAB, but not enough to satisfy the current allocation * request. Currently, we retire the TLAB in all cases, later we could * keep it if the remaining space is above a treshold, and satisfy the * allocation directly from the nursery. */ TLAB_NEXT -= size; /* when running in degraded mode, we continue allocing that way * for a while, to decrease the number of useless nursery collections. */ if (degraded_mode && degraded_mode < DEFAULT_NURSERY_SIZE) return alloc_degraded (vtable, size, FALSE); available_in_tlab = TLAB_REAL_END - TLAB_NEXT; if (size > tlab_size || available_in_tlab > SGEN_MAX_NURSERY_WASTE) { /* Allocate directly from the nursery */ do { p = sgen_nursery_alloc (size); if (!p) { sgen_ensure_free_space (size); if (degraded_mode) return alloc_degraded (vtable, size, FALSE); else p = sgen_nursery_alloc (size); } } while (!p); if (!p) { // no space left g_assert (0); } if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) { memset (p, 0, size); } } else { size_t alloc_size = 0; if (TLAB_START) DEBUG (3, fprintf (gc_debug_file, "Retire TLAB: %p-%p [%ld]\n", TLAB_START, TLAB_REAL_END, (long)(TLAB_REAL_END - TLAB_NEXT - size))); sgen_nursery_retire_region (p, available_in_tlab); do { p = sgen_nursery_alloc_range (tlab_size, size, &alloc_size); if (!p) { sgen_ensure_free_space (tlab_size); if (degraded_mode) return alloc_degraded (vtable, size, FALSE); else p = sgen_nursery_alloc_range (tlab_size, size, &alloc_size); } } while (!p); if (!p) { // no space left g_assert (0); } /* Allocate a new TLAB from the current nursery fragment */ TLAB_START = (char*)p; TLAB_NEXT = TLAB_START; TLAB_REAL_END = TLAB_START + alloc_size; TLAB_TEMP_END = TLAB_START + MIN (SGEN_SCAN_START_SIZE, alloc_size); if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) { memset (TLAB_START, 0, alloc_size); } /* Allocate from the TLAB */ p = (void*)TLAB_NEXT; TLAB_NEXT += size; sgen_set_nursery_scan_start ((char*)p); } } else { /* Reached tlab_temp_end */ /* record the scan start so we can find pinned objects more easily */ sgen_set_nursery_scan_start ((char*)p); /* we just bump tlab_temp_end as well */ TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE); DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", TLAB_NEXT, TLAB_TEMP_END)); } } if (G_LIKELY (p)) { DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size)); binary_protocol_alloc (p, vtable, size); if (G_UNLIKELY (MONO_GC_MAJOR_OBJ_ALLOC_LARGE_ENABLED ()|| MONO_GC_NURSERY_OBJ_ALLOC_ENABLED ())) { if (size > SGEN_MAX_SMALL_OBJ_SIZE) MONO_GC_MAJOR_OBJ_ALLOC_LARGE ((mword)p, size, vtable->klass->name_space, vtable->klass->name); else MONO_GC_NURSERY_OBJ_ALLOC ((mword)p, size, vtable->klass->name_space, vtable->klass->name); } mono_atomic_store_seq (p, vtable); } return p; }
/* * Provide a variant that takes just the vtable for small fixed-size objects. * The aligned size is already computed and stored in vt->gc_descr. * Note: every SGEN_SCAN_START_SIZE or so we are given the chance to do some special * processing. We can keep track of where objects start, for example, * so when we scan the thread stacks for pinned objects, we can start * a search for the pinned object in SGEN_SCAN_START_SIZE chunks. */ GCObject* sgen_alloc_obj_nolock (GCVTable vtable, size_t size) { /* FIXME: handle OOM */ void **p; char *new_next; size_t real_size = size; TLAB_ACCESS_INIT; CANARIFY_SIZE(size); HEAVY_STAT (++stat_objects_alloced); if (real_size <= SGEN_MAX_SMALL_OBJ_SIZE) HEAVY_STAT (stat_bytes_alloced += size); else HEAVY_STAT (stat_bytes_alloced_los += size); size = ALIGN_UP (size); SGEN_ASSERT (6, sgen_vtable_get_descriptor (vtable), "VTable without descriptor"); if (G_UNLIKELY (sgen_has_per_allocation_action)) { static int alloc_count; int current_alloc = mono_atomic_inc_i32 (&alloc_count); if (sgen_collect_before_allocs) { if (((current_alloc % sgen_collect_before_allocs) == 0) && sgen_nursery_section) { sgen_perform_collection (0, GENERATION_NURSERY, "collect-before-alloc-triggered", TRUE, TRUE); if (!sgen_degraded_mode && sgen_can_alloc_size (size) && real_size <= SGEN_MAX_SMALL_OBJ_SIZE) { // FIXME: g_assert_not_reached (); } } } else if (sgen_verify_before_allocs) { if ((current_alloc % sgen_verify_before_allocs) == 0) sgen_check_whole_heap_stw (); } } /* * We must already have the lock here instead of after the * fast path because we might be interrupted in the fast path * (after confirming that new_next < TLAB_TEMP_END) by the GC, * and we'll end up allocating an object in a fragment which * no longer belongs to us. * * The managed allocator does not do this, but it's treated * specially by the world-stopping code. */ if (real_size > SGEN_MAX_SMALL_OBJ_SIZE) { p = (void **)sgen_los_alloc_large_inner (vtable, ALIGN_UP (real_size)); } else { /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */ p = (void**)TLAB_NEXT; /* FIXME: handle overflow */ new_next = (char*)p + size; TLAB_NEXT = new_next; if (G_LIKELY (new_next < TLAB_TEMP_END)) { /* Fast path */ CANARIFY_ALLOC(p,real_size); SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size); sgen_binary_protocol_alloc (p , vtable, size, sgen_client_get_provenance ()); g_assert (*p == NULL); mono_atomic_store_seq (p, vtable); return (GCObject*)p; } /* Slow path */ /* there are two cases: the object is too big or we run out of space in the TLAB */ /* we also reach here when the thread does its first allocation after a minor * collection, since the tlab_ variables are initialized to NULL. * there can be another case (from ORP), if we cooperate with the runtime a bit: * objects that need finalizers can have the high bit set in their size * so the above check fails and we can readily add the object to the queue. * This avoids taking again the GC lock when registering, but this is moot when * doing thread-local allocation, so it may not be a good idea. */ if (TLAB_NEXT >= TLAB_REAL_END) { int available_in_tlab; /* * Run out of space in the TLAB. When this happens, some amount of space * remains in the TLAB, but not enough to satisfy the current allocation * request. Currently, we retire the TLAB in all cases, later we could * keep it if the remaining space is above a treshold, and satisfy the * allocation directly from the nursery. */ TLAB_NEXT -= size; /* when running in degraded mode, we continue allocing that way * for a while, to decrease the number of useless nursery collections. */ if (sgen_degraded_mode && sgen_degraded_mode < sgen_nursery_size) return alloc_degraded (vtable, size, FALSE); available_in_tlab = (int)(TLAB_REAL_END - TLAB_NEXT);//We'll never have tlabs > 2Gb if (size > sgen_tlab_size || available_in_tlab > SGEN_MAX_NURSERY_WASTE) { /* Allocate directly from the nursery */ p = (void **)sgen_nursery_alloc (size); if (!p) { /* * We couldn't allocate from the nursery, so we try * collecting. Even after the collection, we might * still not have enough memory to allocate the * object. The reason will most likely be that we've * run out of memory, but there is the theoretical * possibility that other threads might have consumed * the freed up memory ahead of us. * * What we do in this case is allocate degraded, i.e., * from the major heap. * * Ideally we'd like to detect the case of other * threads allocating ahead of us and loop (if we * always loop we will loop endlessly in the case of * OOM). */ sgen_ensure_free_space (real_size, GENERATION_NURSERY); if (!sgen_degraded_mode) p = (void **)sgen_nursery_alloc (size); } if (!p) return alloc_degraded (vtable, size, TRUE); zero_tlab_if_necessary (p, size); } else { size_t alloc_size = 0; if (TLAB_START) SGEN_LOG (3, "Retire TLAB: %p-%p [%ld]", TLAB_START, TLAB_REAL_END, (long)(TLAB_REAL_END - TLAB_NEXT - size)); sgen_nursery_retire_region (p, available_in_tlab); p = (void **)sgen_nursery_alloc_range (sgen_tlab_size, size, &alloc_size); if (!p) { /* See comment above in similar case. */ sgen_ensure_free_space (sgen_tlab_size, GENERATION_NURSERY); if (!sgen_degraded_mode) p = (void **)sgen_nursery_alloc_range (sgen_tlab_size, size, &alloc_size); } if (!p) return alloc_degraded (vtable, size, TRUE); /* Allocate a new TLAB from the current nursery fragment */ TLAB_START = (char*)p; TLAB_NEXT = TLAB_START; TLAB_REAL_END = TLAB_START + alloc_size; TLAB_TEMP_END = TLAB_START + MIN (SGEN_SCAN_START_SIZE, alloc_size); zero_tlab_if_necessary (TLAB_START, alloc_size); /* Allocate from the TLAB */ p = (void **)TLAB_NEXT; TLAB_NEXT += size; sgen_set_nursery_scan_start ((char*)p); } } else { /* Reached tlab_temp_end */ /* record the scan start so we can find pinned objects more easily */ sgen_set_nursery_scan_start ((char*)p); /* we just bump tlab_temp_end as well */ TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE); SGEN_LOG (5, "Expanding local alloc: %p-%p", TLAB_NEXT, TLAB_TEMP_END); } CANARIFY_ALLOC(p,real_size); } if (G_LIKELY (p)) { SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size); sgen_binary_protocol_alloc (p, vtable, size, sgen_client_get_provenance ()); mono_atomic_store_seq (p, vtable); } return (GCObject*)p; }