void storageAddCapabilities (uint32_t from, uint32_t to) { uint32_t n, g, i, new_n_nurseries; if (RtsFlags.GcFlags.nurseryChunkSize == 0) { new_n_nurseries = to; } else { memcount total_alloc = to * RtsFlags.GcFlags.minAllocAreaSize; new_n_nurseries = stg_max(to, total_alloc / RtsFlags.GcFlags.nurseryChunkSize); } if (from > 0) { nurseries = stgReallocBytes(nurseries, new_n_nurseries * sizeof(struct nursery_), "storageAddCapabilities"); } else { nurseries = stgMallocBytes(new_n_nurseries * sizeof(struct nursery_), "storageAddCapabilities"); } // we've moved the nurseries, so we have to update the rNursery // pointers from the Capabilities. for (i = 0; i < to; i++) { capabilities[i]->r.rNursery = &nurseries[i]; } /* The allocation area. Policy: keep the allocation area * small to begin with, even if we have a large suggested heap * size. Reason: we're going to do a major collection first, and we * don't want it to be a big one. This vague idea is borne out by * rigorous experimental evidence. */ allocNurseries(n_nurseries, new_n_nurseries); n_nurseries = new_n_nurseries; /* * Assign each of the new capabilities a nursery. Remember to start from * next_nursery, because we may have already consumed some of the earlier * nurseries. */ assignNurseriesToCapabilities(from,to); // allocate a block for each mut list for (n = from; n < to; n++) { for (g = 1; g < RtsFlags.GcFlags.generations; g++) { capabilities[n]->mut_lists[g] = allocBlockOnNode(capNoToNumaNode(n)); } } #if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR) && (CC_SUPPORTS_TLS == 0) newThreadLocalKey(&gctKey); #endif initGcThreads(from, to); }
bdescr* allocGroup_sync(uint32_t n) { bdescr *bd; uint32_t node = capNoToNumaNode(gct->thread_index); ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); bd = allocGroupOnNode(node,n); RELEASE_SPIN_LOCK(&gc_alloc_block_sync); return bd; }
// // Resize each of the nurseries to the specified size. // static void resizeNurseriesEach (W_ blocks) { uint32_t i, node; bdescr *bd; W_ nursery_blocks; nursery *nursery; for (i = 0; i < n_nurseries; i++) { nursery = &nurseries[i]; nursery_blocks = nursery->n_blocks; if (nursery_blocks == blocks) continue; node = capNoToNumaNode(i); if (nursery_blocks < blocks) { debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks", blocks); nursery->blocks = allocNursery(node, nursery->blocks, blocks-nursery_blocks); } else { bdescr *next_bd; debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks", blocks); bd = nursery->blocks; while (nursery_blocks > blocks) { next_bd = bd->link; next_bd->u.back = NULL; nursery_blocks -= bd->blocks; // might be a large block freeGroup(bd); bd = next_bd; } nursery->blocks = bd; // might have gone just under, by freeing a large block, so make // up the difference. if (nursery_blocks < blocks) { nursery->blocks = allocNursery(node, nursery->blocks, blocks-nursery_blocks); } } nursery->n_blocks = blocks; ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks); } }
static void allocNurseries (uint32_t from, uint32_t to) { uint32_t i; memcount n_blocks; if (RtsFlags.GcFlags.nurseryChunkSize) { n_blocks = RtsFlags.GcFlags.nurseryChunkSize; } else { n_blocks = RtsFlags.GcFlags.minAllocAreaSize; } for (i = from; i < to; i++) { nurseries[i].blocks = allocNursery(capNoToNumaNode(i), NULL, n_blocks); nurseries[i].n_blocks = n_blocks; } }
void resetNurseries (void) { uint32_t n; for (n = 0; n < n_numa_nodes; n++) { next_nursery[n] = n; } assignNurseriesToCapabilities(0, n_capabilities); #ifdef DEBUG bdescr *bd; for (n = 0; n < n_nurseries; n++) { for (bd = nurseries[n].blocks; bd; bd = bd->link) { ASSERT(bd->gen_no == 0); ASSERT(bd->gen == g0); ASSERT(bd->node == capNoToNumaNode(n)); IF_DEBUG(sanity, memset(bd->start, 0xaa, BLOCK_SIZE)); } } #endif }
static uint32_t allocBlocks_sync(uint32_t n, bdescr **hd) { bdescr *bd; uint32_t i; uint32_t node = capNoToNumaNode(gct->thread_index); ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); bd = allocLargeChunkOnNode(node,1,n); // NB. allocLargeChunk, rather than allocGroup(n), to allocate in a // fragmentation-friendly way. n = bd->blocks; for (i = 0; i < n; i++) { bd[i].blocks = 1; bd[i].link = &bd[i+1]; bd[i].free = bd[i].start; } bd[n-1].link = NULL; // We have to hold the lock until we've finished fiddling with the metadata, // otherwise the block allocator can get confused. RELEASE_SPIN_LOCK(&gc_alloc_block_sync); *hd = bd; return n; }
static void initCapability (Capability *cap, uint32_t i) { uint32_t g; cap->no = i; cap->node = capNoToNumaNode(i); cap->in_haskell = rtsFalse; cap->idle = 0; cap->disabled = rtsFalse; cap->run_queue_hd = END_TSO_QUEUE; cap->run_queue_tl = END_TSO_QUEUE; #if defined(THREADED_RTS) initMutex(&cap->lock); cap->running_task = NULL; // indicates cap is free cap->spare_workers = NULL; cap->n_spare_workers = 0; cap->suspended_ccalls = NULL; cap->returning_tasks_hd = NULL; cap->returning_tasks_tl = NULL; cap->inbox = (Message*)END_TSO_QUEUE; cap->sparks = allocSparkPool(); cap->spark_stats.created = 0; cap->spark_stats.dud = 0; cap->spark_stats.overflowed = 0; cap->spark_stats.converted = 0; cap->spark_stats.gcd = 0; cap->spark_stats.fizzled = 0; #if !defined(mingw32_HOST_OS) cap->io_manager_control_wr_fd = -1; #endif #endif cap->total_allocated = 0; cap->f.stgEagerBlackholeInfo = (W_)&__stg_EAGER_BLACKHOLE_info; cap->f.stgGCEnter1 = (StgFunPtr)__stg_gc_enter_1; cap->f.stgGCFun = (StgFunPtr)__stg_gc_fun; cap->mut_lists = stgMallocBytes(sizeof(bdescr *) * RtsFlags.GcFlags.generations, "initCapability"); cap->saved_mut_lists = stgMallocBytes(sizeof(bdescr *) * RtsFlags.GcFlags.generations, "initCapability"); for (g = 0; g < RtsFlags.GcFlags.generations; g++) { cap->mut_lists[g] = NULL; } cap->weak_ptr_list_hd = NULL; cap->weak_ptr_list_tl = NULL; cap->free_tvar_watch_queues = END_STM_WATCH_QUEUE; cap->free_invariant_check_queues = END_INVARIANT_CHECK_QUEUE; cap->free_trec_chunks = END_STM_CHUNK_LIST; cap->free_trec_headers = NO_TREC; cap->transaction_tokens = 0; cap->context_switch = 0; cap->pinned_object_block = NULL; cap->pinned_object_blocks = NULL; #ifdef PROFILING cap->r.rCCCS = CCS_SYSTEM; #else cap->r.rCCCS = NULL; #endif // cap->r.rCurrentTSO is charged for calls to allocate(), so we // don't want it set when not running a Haskell thread. cap->r.rCurrentTSO = NULL; traceCapCreate(cap); traceCapsetAssignCap(CAPSET_OSPROCESS_DEFAULT, i); traceCapsetAssignCap(CAPSET_CLOCKDOMAIN_DEFAULT, i); #if defined(THREADED_RTS) traceSparkCounters(cap); #endif }