static bdescr * allocNursery (uint32_t node, bdescr *tail, W_ blocks) { bdescr *bd = NULL; W_ i, n; // We allocate the nursery as a single contiguous block and then // divide it into single blocks manually. This way we guarantee // that the nursery blocks are adjacent, so that the processor's // automatic prefetching works across nursery blocks. This is a // tiny optimisation (~0.5%), but it's free. while (blocks > 0) { n = stg_min(BLOCKS_PER_MBLOCK, blocks); // allocLargeChunk will prefer large chunks, but will pick up // small chunks if there are any available. We must allow // single blocks here to avoid fragmentation (#7257) bd = allocLargeChunkOnNode(node, 1, n); n = bd->blocks; blocks -= n; for (i = 0; i < n; i++) { initBdescr(&bd[i], g0, g0); bd[i].blocks = 1; bd[i].flags = 0; if (i > 0) { bd[i].u.back = &bd[i-1]; } else { bd[i].u.back = NULL; } if (i+1 < n) { bd[i].link = &bd[i+1]; } else { bd[i].link = tail; if (tail != NULL) { tail->u.back = &bd[i]; } } bd[i].free = bd[i].start; } tail = &bd[0]; } return &bd[0]; }
static uint32_t allocBlocks_sync(uint32_t n, bdescr **hd) { bdescr *bd; uint32_t i; uint32_t node = capNoToNumaNode(gct->thread_index); ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); bd = allocLargeChunkOnNode(node,1,n); // NB. allocLargeChunk, rather than allocGroup(n), to allocate in a // fragmentation-friendly way. n = bd->blocks; for (i = 0; i < n; i++) { bd[i].blocks = 1; bd[i].link = &bd[i+1]; bd[i].free = bd[i].start; } bd[n-1].link = NULL; // We have to hold the lock until we've finished fiddling with the metadata, // otherwise the block allocator can get confused. RELEASE_SPIN_LOCK(&gc_alloc_block_sync); *hd = bd; return n; }