static bdescr * allocNursery (bdescr *tail, W_ blocks) { bdescr *bd = NULL; W_ i, n; // We allocate the nursery as a single contiguous block and then // divide it into single blocks manually. This way we guarantee // that the nursery blocks are adjacent, so that the processor's // automatic prefetching works across nursery blocks. This is a // tiny optimisation (~0.5%), but it's free. while (blocks > 0) { n = stg_min(BLOCKS_PER_MBLOCK, blocks); // allocLargeChunk will prefer large chunks, but will pick up // small chunks if there are any available. We must allow // single blocks here to avoid fragmentation (#7257) bd = allocLargeChunk(1, n); n = bd->blocks; blocks -= n; for (i = 0; i < n; i++) { initBdescr(&bd[i], g0, g0); bd[i].blocks = 1; bd[i].flags = 0; if (i > 0) { bd[i].u.back = &bd[i-1]; } else { bd[i].u.back = NULL; } if (i+1 < n) { bd[i].link = &bd[i+1]; } else { bd[i].link = tail; if (tail != NULL) { tail->u.back = &bd[i]; } } bd[i].free = bd[i].start; } tail = &bd[0]; } return &bd[0]; }
StgPtr alloc_todo_block (gen_workspace *ws, nat size) { bdescr *bd/*, *hd, *tl */; // Grab a part block if we have one, and it has enough room bd = ws->part_list; if (bd != NULL && bd->start + bd->blocks * BLOCK_SIZE_W - bd->free > (int)size) { ws->part_list = bd->link; ws->n_part_blocks -= bd->blocks; } else { // blocks in to-space get the BF_EVACUATED flag. // allocBlocks_sync(16, &hd, &tl, // ws->step->gen_no, ws->step, BF_EVACUATED); // // tl->link = ws->part_list; // ws->part_list = hd->link; // ws->n_part_blocks += 15; // // bd = hd; if (size > BLOCK_SIZE_W) { bd = allocGroup_sync((W_)BLOCK_ROUND_UP(size*sizeof(W_)) / BLOCK_SIZE); } else { bd = allocBlock_sync(); } initBdescr(bd, ws->gen, ws->gen->to); bd->flags = BF_EVACUATED; bd->u.scan = bd->free = bd->start; } bd->link = NULL; ws->todo_bd = bd; ws->todo_free = bd->free; ws->todo_lim = stg_min(bd->start + bd->blocks * BLOCK_SIZE_W, bd->free + stg_max(WORK_UNIT_WORDS,size)); debugTrace(DEBUG_gc, "alloc new todo block %p for gen %d", bd->free, ws->gen->no); return ws->todo_free; }
StgPtr alloc_todo_block (gen_workspace *ws, uint32_t size) { bdescr *bd/*, *hd, *tl */; // Grab a part block if we have one, and it has enough room bd = ws->part_list; if (bd != NULL && bd->start + bd->blocks * BLOCK_SIZE_W - bd->free > (int)size) { ws->part_list = bd->link; ws->n_part_blocks -= bd->blocks; ws->n_part_words -= bd->free - bd->start; } else { if (size > BLOCK_SIZE_W) { bd = allocGroup_sync((W_)BLOCK_ROUND_UP(size*sizeof(W_)) / BLOCK_SIZE); } else { if (gct->free_blocks) { bd = gct->free_blocks; gct->free_blocks = bd->link; } else { allocBlocks_sync(16, &bd); gct->free_blocks = bd->link; } } // blocks in to-space get the BF_EVACUATED flag. bd->flags = BF_EVACUATED; bd->u.scan = bd->start; initBdescr(bd, ws->gen, ws->gen->to); } bd->link = NULL; ws->todo_bd = bd; ws->todo_free = bd->free; ws->todo_lim = stg_min(bd->start + bd->blocks * BLOCK_SIZE_W, bd->free + stg_max(WORK_UNIT_WORDS,size)); // See Note [big objects] debugTrace(DEBUG_gc, "alloc new todo block %p for gen %d", bd->free, ws->gen->no); return ws->todo_free; }
STATIC_INLINE void evacuate_large(StgPtr p) { bdescr *bd; generation *gen, *new_gen; nat gen_no, new_gen_no; gen_workspace *ws; bd = Bdescr(p); gen = bd->gen; gen_no = bd->gen_no; ACQUIRE_SPIN_LOCK(&gen->sync); // already evacuated? if (bd->flags & BF_EVACUATED) { /* Don't forget to set the gct->failed_to_evac flag if we didn't get * the desired destination (see comments in evacuate()). */ if (gen_no < gct->evac_gen_no) { gct->failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } RELEASE_SPIN_LOCK(&gen->sync); return; } // remove from large_object list if (bd->u.back) { bd->u.back->link = bd->link; } else { // first object in the list gen->large_objects = bd->link; } if (bd->link) { bd->link->u.back = bd->u.back; } /* link it on to the evacuated large object list of the destination gen */ new_gen_no = bd->dest_no; if (new_gen_no < gct->evac_gen_no) { if (gct->eager_promotion) { new_gen_no = gct->evac_gen_no; } else { gct->failed_to_evac = rtsTrue; } } ws = &gct->gens[new_gen_no]; new_gen = &generations[new_gen_no]; bd->flags |= BF_EVACUATED; initBdescr(bd, new_gen, new_gen->to); // If this is a block of pinned objects, we don't have to scan // these objects, because they aren't allowed to contain any // pointers. For these blocks, we skip the scavenge stage and put // them straight on the scavenged_large_objects list. if (bd->flags & BF_PINNED) { ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS); if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync); } dbl_link_onto(bd, &new_gen->scavenged_large_objects); new_gen->n_scavenged_large_blocks += bd->blocks; if (new_gen != gen) { RELEASE_SPIN_LOCK(&new_gen->sync); } } else { bd->link = ws->todo_large_objects; ws->todo_large_objects = bd; } RELEASE_SPIN_LOCK(&gen->sync); }
StgPtr allocatePinned (Capability *cap, W_ n) { StgPtr p; bdescr *bd; // If the request is for a large object, then allocate() // will give us a pinned object anyway. if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { p = allocate(cap, n); Bdescr(p)->flags |= BF_PINNED; return p; } TICK_ALLOC_HEAP_NOCTR(WDS(n)); CCS_ALLOC(cap->r.rCCCS,n); if (cap->r.rCurrentTSO != NULL) { // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_); ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit), (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit)) - n*sizeof(W_))); } bd = cap->pinned_object_block; // If we don't have a block of pinned objects yet, or the current // one isn't large enough to hold the new object, get a new one. if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) { // stash the old block on cap->pinned_object_blocks. On the // next GC cycle these objects will be moved to // g0->large_objects. if (bd != NULL) { // add it to the allocation stats when the block is full finishedNurseryBlock(cap, bd); dbl_link_onto(bd, &cap->pinned_object_blocks); } // We need to find another block. We could just allocate one, // but that means taking a global lock and we really want to // avoid that (benchmarks that allocate a lot of pinned // objects scale really badly if we do this). // // So first, we try taking the next block from the nursery, in // the same way as allocate(), but note that we can only take // an *empty* block, because we're about to mark it as // BF_PINNED | BF_LARGE. bd = cap->r.rCurrentNursery->link; if (bd == NULL) { // must be empty! // The nursery is empty, or the next block is non-empty: // allocate a fresh block (we can't fail here). // XXX in the case when the next nursery block is // non-empty we aren't exerting any pressure to GC soon, // so if this case ever happens then we could in theory // keep allocating for ever without calling the GC. We // can't bump g0->n_new_large_words because that will be // counted towards allocation, and we're already counting // our pinned obects as allocation in // collect_pinned_object_blocks in the GC. ACQUIRE_SM_LOCK; bd = allocBlock(); RELEASE_SM_LOCK; initBdescr(bd, g0, g0); } else { newNurseryBlock(bd); // we have a block in the nursery: steal it cap->r.rCurrentNursery->link = bd->link; if (bd->link != NULL) { bd->link->u.back = cap->r.rCurrentNursery; } cap->r.rNursery->n_blocks -= bd->blocks; } cap->pinned_object_block = bd; bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED; // The pinned_object_block remains attached to the capability // until it is full, even if a GC occurs. We want this // behaviour because otherwise the unallocated portion of the // block would be forever slop, and under certain workloads // (allocating a few ByteStrings per GC) we accumulate a lot // of slop. // // So, the pinned_object_block is initially marked // BF_EVACUATED so the GC won't touch it. When it is full, // we place it on the large_objects list, and at the start of // the next GC the BF_EVACUATED flag will be cleared, and the // block will be promoted as usual (if anything in it is // live). } p = bd->free; bd->free += n; return p; }
StgPtr allocate (Capability *cap, W_ n) { bdescr *bd; StgPtr p; TICK_ALLOC_HEAP_NOCTR(WDS(n)); CCS_ALLOC(cap->r.rCCCS,n); if (cap->r.rCurrentTSO != NULL) { // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_) ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit), (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit)) - n*sizeof(W_))); } if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { // The largest number of words such that // the computation of req_blocks will not overflow. W_ max_words = (HS_WORD_MAX & ~(BLOCK_SIZE-1)) / sizeof(W_); W_ req_blocks; if (n > max_words) req_blocks = HS_WORD_MAX; // signal overflow below else req_blocks = (W_)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; // Attempting to allocate an object larger than maxHeapSize // should definitely be disallowed. (bug #1791) if ((RtsFlags.GcFlags.maxHeapSize > 0 && req_blocks >= RtsFlags.GcFlags.maxHeapSize) || req_blocks >= HS_INT32_MAX) // avoid overflow when // calling allocGroup() below { heapOverflow(); // heapOverflow() doesn't exit (see #2592), but we aren't // in a position to do a clean shutdown here: we // either have to allocate the memory or exit now. // Allocating the memory would be bad, because the user // has requested that we not exceed maxHeapSize, so we // just exit. stg_exit(EXIT_HEAPOVERFLOW); } ACQUIRE_SM_LOCK bd = allocGroup(req_blocks); dbl_link_onto(bd, &g0->large_objects); g0->n_large_blocks += bd->blocks; // might be larger than req_blocks g0->n_new_large_words += n; RELEASE_SM_LOCK; initBdescr(bd, g0, g0); bd->flags = BF_LARGE; bd->free = bd->start + n; cap->total_allocated += n; return bd->start; } /* small allocation (<LARGE_OBJECT_THRESHOLD) */ bd = cap->r.rCurrentAlloc; if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { if (bd) finishedNurseryBlock(cap,bd); // The CurrentAlloc block is full, we need to find another // one. First, we try taking the next block from the // nursery: bd = cap->r.rCurrentNursery->link; if (bd == NULL) { // The nursery is empty: allocate a fresh block (we can't // fail here). ACQUIRE_SM_LOCK; bd = allocBlock(); cap->r.rNursery->n_blocks++; RELEASE_SM_LOCK; initBdescr(bd, g0, g0); bd->flags = 0; // If we had to allocate a new block, then we'll GC // pretty quickly now, because MAYBE_GC() will // notice that CurrentNursery->link is NULL. } else { newNurseryBlock(bd); // we have a block in the nursery: take it and put // it at the *front* of the nursery list, and use it // to allocate() from. // // Previously the nursery looked like this: // // CurrentNursery // / // +-+ +-+ // nursery -> ... |A| -> |B| -> ... // +-+ +-+ // // After doing this, it looks like this: // // CurrentNursery // / // +-+ +-+ // nursery -> |B| -> ... -> |A| -> ... // +-+ +-+ // | // CurrentAlloc // // The point is to get the block out of the way of the // advancing CurrentNursery pointer, while keeping it // on the nursery list so we don't lose track of it. cap->r.rCurrentNursery->link = bd->link; if (bd->link != NULL) { bd->link->u.back = cap->r.rCurrentNursery; } } dbl_link_onto(bd, &cap->r.rNursery->blocks); cap->r.rCurrentAlloc = bd; IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery)); } p = bd->free; bd->free += n; IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa)); return p; }
static StgCompactNFDataBlock * compactAllocateBlockInternal(Capability *cap, StgWord aligned_size, StgCompactNFDataBlock *first, AllocateOp operation) { StgCompactNFDataBlock *self; bdescr *block, *head; uint32_t n_blocks; generation *g; n_blocks = aligned_size / BLOCK_SIZE; // Attempting to allocate an object larger than maxHeapSize // should definitely be disallowed. (bug #1791) if ((RtsFlags.GcFlags.maxHeapSize > 0 && n_blocks >= RtsFlags.GcFlags.maxHeapSize) || n_blocks >= HS_INT32_MAX) // avoid overflow when // calling allocGroup() below { reportHeapOverflow(); // reportHeapOverflow() doesn't exit (see #2592), but we aren't // in a position to do a clean shutdown here: we // either have to allocate the memory or exit now. // Allocating the memory would be bad, because the user // has requested that we not exceed maxHeapSize, so we // just exit. stg_exit(EXIT_HEAPOVERFLOW); } // It is imperative that first is the first block in the compact // (or NULL if the compact does not exist yet) // because the evacuate code does not update the generation of // blocks other than the first (so we would get the statistics // wrong and crash in Sanity) if (first != NULL) { block = Bdescr((P_)first); g = block->gen; } else { g = g0; } ACQUIRE_SM_LOCK; block = allocGroup(n_blocks); switch (operation) { case ALLOCATE_NEW: ASSERT(first == NULL); ASSERT(g == g0); dbl_link_onto(block, &g0->compact_objects); g->n_compact_blocks += block->blocks; g->n_new_large_words += aligned_size / sizeof(StgWord); break; case ALLOCATE_IMPORT_NEW: dbl_link_onto(block, &g0->compact_blocks_in_import); /* fallthrough */ case ALLOCATE_IMPORT_APPEND: ASSERT(first == NULL); ASSERT(g == g0); g->n_compact_blocks_in_import += block->blocks; g->n_new_large_words += aligned_size / sizeof(StgWord); break; case ALLOCATE_APPEND: g->n_compact_blocks += block->blocks; if (g == g0) g->n_new_large_words += aligned_size / sizeof(StgWord); break; default: #if defined(DEBUG) ASSERT(!"code should not be reached"); #else RTS_UNREACHABLE; #endif } RELEASE_SM_LOCK; cap->total_allocated += aligned_size / sizeof(StgWord); self = (StgCompactNFDataBlock*) block->start; self->self = self; self->next = NULL; head = block; initBdescr(head, g, g); head->flags = BF_COMPACT; for (block = head + 1, n_blocks --; n_blocks > 0; block++, n_blocks--) { block->link = head; block->blocks = 0; block->flags = BF_COMPACT; } return self; }
/* ---------------------------------------------------------------------------- Evacuate an object inside a CompactNFData These are treated in a similar way to large objects. We remove the block from the compact_objects list of the generation it is on, and link it onto the live_compact_objects list of the destination generation. It is assumed that objects in the struct live in the same generation as the struct itself all the time. ------------------------------------------------------------------------- */ STATIC_INLINE void evacuate_compact (StgPtr p) { StgCompactNFData *str; bdescr *bd; generation *gen, *new_gen; uint32_t gen_no, new_gen_no; // We need to find the Compact# corresponding to this pointer, because it // will give us the first block in the compact chain, which is the one we // that gets linked onto the compact_objects list. str = objectGetCompact((StgClosure*)p); ASSERT(get_itbl((StgClosure*)str)->type == COMPACT_NFDATA); bd = Bdescr((StgPtr)str); gen_no = bd->gen_no; // already evacuated? (we're about to do the same check, // but we avoid taking the spin-lock) if (bd->flags & BF_EVACUATED) { /* Don't forget to set the gct->failed_to_evac flag if we didn't get * the desired destination (see comments in evacuate()). */ if (gen_no < gct->evac_gen_no) { gct->failed_to_evac = true; TICK_GC_FAILED_PROMOTION(); } return; } gen = bd->gen; gen_no = bd->gen_no; ACQUIRE_SPIN_LOCK(&gen->sync); // already evacuated? if (bd->flags & BF_EVACUATED) { /* Don't forget to set the gct->failed_to_evac flag if we didn't get * the desired destination (see comments in evacuate()). */ if (gen_no < gct->evac_gen_no) { gct->failed_to_evac = true; TICK_GC_FAILED_PROMOTION(); } RELEASE_SPIN_LOCK(&gen->sync); return; } // remove from compact_objects list if (bd->u.back) { bd->u.back->link = bd->link; } else { // first object in the list gen->compact_objects = bd->link; } if (bd->link) { bd->link->u.back = bd->u.back; } /* link it on to the evacuated compact object list of the destination gen */ new_gen_no = bd->dest_no; if (new_gen_no < gct->evac_gen_no) { if (gct->eager_promotion) { new_gen_no = gct->evac_gen_no; } else { gct->failed_to_evac = true; } } new_gen = &generations[new_gen_no]; // Note: for speed we only update the generation of the first block here // This means that bdescr of subsequent blocks will think they are in // the wrong generation // (This should not be a problem because there is no code that checks // for that - the only code touching the generation of the block is // in the GC, and that should never see blocks other than the first) bd->flags |= BF_EVACUATED; initBdescr(bd, new_gen, new_gen->to); if (str->hash) { gen_workspace *ws = &gct->gens[new_gen_no]; bd->link = ws->todo_large_objects; ws->todo_large_objects = bd; } else { if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync); } dbl_link_onto(bd, &new_gen->live_compact_objects); new_gen->n_live_compact_blocks += str->totalW / BLOCK_SIZE_W; if (new_gen != gen) { RELEASE_SPIN_LOCK(&new_gen->sync); } } RELEASE_SPIN_LOCK(&gen->sync); // Note: the object did not move in memory, because it lives // in pinned (BF_COMPACT) allocation, so we do not need to rewrite it // or muck with forwarding pointers // Also there is no tag to worry about on the struct (tags are used // for constructors and functions, but a struct is neither). There // might be a tag on the object pointer, but again we don't change // the pointer because we don't move the object so we don't need to // rewrite the tag. }