// scavenge only the marked areas of a MUT_ARR_PTRS static StgPtr scavenge_mut_arr_ptrs_marked (StgMutArrPtrs *a) { lnat m; StgPtr p, q; rtsBool any_failed; any_failed = rtsFalse; for (m = 0; m < mutArrPtrsCards(a->ptrs); m++) { if (*mutArrPtrsCard(a,m) != 0) { p = (StgPtr)&a->payload[m << MUT_ARR_PTRS_CARD_BITS]; q = stg_min(p + (1 << MUT_ARR_PTRS_CARD_BITS), (StgPtr)&a->payload[a->ptrs]); for (; p < q; p++) { evacuate((StgClosure**)p); } if (gct->failed_to_evac) { any_failed = rtsTrue; gct->failed_to_evac = rtsFalse; } else { *mutArrPtrsCard(a,m) = 0; } } } gct->failed_to_evac = any_failed; return (StgPtr)a + mut_arr_ptrs_sizeW(a); }
static bdescr * allocNursery (bdescr *tail, W_ blocks) { bdescr *bd = NULL; W_ i, n; // We allocate the nursery as a single contiguous block and then // divide it into single blocks manually. This way we guarantee // that the nursery blocks are adjacent, so that the processor's // automatic prefetching works across nursery blocks. This is a // tiny optimisation (~0.5%), but it's free. while (blocks > 0) { n = stg_min(BLOCKS_PER_MBLOCK, blocks); // allocLargeChunk will prefer large chunks, but will pick up // small chunks if there are any available. We must allow // single blocks here to avoid fragmentation (#7257) bd = allocLargeChunk(1, n); n = bd->blocks; blocks -= n; for (i = 0; i < n; i++) { initBdescr(&bd[i], g0, g0); bd[i].blocks = 1; bd[i].flags = 0; if (i > 0) { bd[i].u.back = &bd[i-1]; } else { bd[i].u.back = NULL; } if (i+1 < n) { bd[i].link = &bd[i+1]; } else { bd[i].link = tail; if (tail != NULL) { tail->u.back = &bd[i]; } } bd[i].free = bd[i].start; } tail = &bd[0]; } return &bd[0]; }
StgPtr alloc_todo_block (gen_workspace *ws, nat size) { bdescr *bd/*, *hd, *tl */; // Grab a part block if we have one, and it has enough room bd = ws->part_list; if (bd != NULL && bd->start + bd->blocks * BLOCK_SIZE_W - bd->free > (int)size) { ws->part_list = bd->link; ws->n_part_blocks -= bd->blocks; } else { // blocks in to-space get the BF_EVACUATED flag. // allocBlocks_sync(16, &hd, &tl, // ws->step->gen_no, ws->step, BF_EVACUATED); // // tl->link = ws->part_list; // ws->part_list = hd->link; // ws->n_part_blocks += 15; // // bd = hd; if (size > BLOCK_SIZE_W) { bd = allocGroup_sync((W_)BLOCK_ROUND_UP(size*sizeof(W_)) / BLOCK_SIZE); } else { bd = allocBlock_sync(); } initBdescr(bd, ws->gen, ws->gen->to); bd->flags = BF_EVACUATED; bd->u.scan = bd->free = bd->start; } bd->link = NULL; ws->todo_bd = bd; ws->todo_free = bd->free; ws->todo_lim = stg_min(bd->start + bd->blocks * BLOCK_SIZE_W, bd->free + stg_max(WORK_UNIT_WORDS,size)); debugTrace(DEBUG_gc, "alloc new todo block %p for gen %d", bd->free, ws->gen->no); return ws->todo_free; }
StgPtr alloc_todo_block (gen_workspace *ws, uint32_t size) { bdescr *bd/*, *hd, *tl */; // Grab a part block if we have one, and it has enough room bd = ws->part_list; if (bd != NULL && bd->start + bd->blocks * BLOCK_SIZE_W - bd->free > (int)size) { ws->part_list = bd->link; ws->n_part_blocks -= bd->blocks; ws->n_part_words -= bd->free - bd->start; } else { if (size > BLOCK_SIZE_W) { bd = allocGroup_sync((W_)BLOCK_ROUND_UP(size*sizeof(W_)) / BLOCK_SIZE); } else { if (gct->free_blocks) { bd = gct->free_blocks; gct->free_blocks = bd->link; } else { allocBlocks_sync(16, &bd); gct->free_blocks = bd->link; } } // blocks in to-space get the BF_EVACUATED flag. bd->flags = BF_EVACUATED; bd->u.scan = bd->start; initBdescr(bd, ws->gen, ws->gen->to); } bd->link = NULL; ws->todo_bd = bd; ws->todo_free = bd->free; ws->todo_lim = stg_min(bd->start + bd->blocks * BLOCK_SIZE_W, bd->free + stg_max(WORK_UNIT_WORDS,size)); // See Note [big objects] debugTrace(DEBUG_gc, "alloc new todo block %p for gen %d", bd->free, ws->gen->no); return ws->todo_free; }
static void scavenge_large_bitmap( StgPtr p, StgLargeBitmap *large_bitmap, nat size ) { nat i, j, b; StgWord bitmap; b = 0; for (i = 0; i < size; b++) { bitmap = large_bitmap->bitmap[b]; j = stg_min(size-i, BITS_IN(W_)); i += j; for (; j > 0; j--, p++) { if ((bitmap & 1) == 0) { evacuate((StgClosure **)p); } bitmap = bitmap >> 1; } } }
static void gtc_heap_view_closure_ptrs_in_large_bitmap(StgClosure *ptrs[], StgWord *nptrs, StgClosure **p, StgLargeBitmap *large_bitmap, nat size ) { nat i, j, b; StgWord bitmap; b = 0; for (i = 0; i < size; b++) { bitmap = large_bitmap->bitmap[b]; j = stg_min(size-i, BITS_IN(W_)); i += j; for (; j > 0; j--, p++) { if ((bitmap & 1) == 0) { ptrs[(*nptrs)++] = *p; } bitmap = bitmap >> 1; } } }
StgPtr todo_block_full (uint32_t size, gen_workspace *ws) { bool urgent_to_push, can_extend; StgPtr p; bdescr *bd; // todo_free has been pre-incremented by Evac.c:alloc_for_copy(). We // are expected to leave it bumped when we've finished here. ws->todo_free -= size; bd = ws->todo_bd; ASSERT(bd != NULL); ASSERT(bd->link == NULL); ASSERT(bd->gen == ws->gen); // We intentionally set ws->todo_lim lower than the full size of // the block, so that we can push out some work to the global list // and get the parallel threads working as soon as possible. // // So when ws->todo_lim is reached, we end up here and have to // decide whether it's worth pushing out the work we have or not. // If we have enough room in the block to evacuate the current // object, and it's not urgent to push this work, then we just // extend the limit and keep going. Where "urgent" is defined as: // the global pool is empty, and there's enough work in this block // to make it worth pushing. // urgent_to_push = looksEmptyWSDeque(ws->todo_q) && (ws->todo_free - bd->u.scan >= WORK_UNIT_WORDS / 2); // We can extend the limit for the current block if there's enough // room for the current object, *and* we're not into the second or // subsequent block of a large block (see Note [big objects]). can_extend = ws->todo_free + size <= bd->start + bd->blocks * BLOCK_SIZE_W && ws->todo_free < ws->todo_bd->start + BLOCK_SIZE_W; if (!urgent_to_push && can_extend) { ws->todo_lim = stg_min(bd->start + bd->blocks * BLOCK_SIZE_W, ws->todo_lim + stg_max(WORK_UNIT_WORDS,size)); debugTrace(DEBUG_gc, "increasing limit for %p to %p", bd->start, ws->todo_lim); p = ws->todo_free; ws->todo_free += size; return p; } gct->copied += ws->todo_free - bd->free; bd->free = ws->todo_free; ASSERT(bd->u.scan >= bd->start && bd->u.scan <= bd->free); // If this block is not the scan block, we want to push it out and // make room for a new todo block. if (bd != gct->scan_bd) { // If this block does not have enough space to allocate the // current object, but it also doesn't have any work to push, then // push it on to the scanned list. if (bd->u.scan == bd->free) { if (bd->free == bd->start) { // Normally the block would not be empty, because then // there would be enough room to copy the current // object. However, if the object we're copying is // larger than a block, then we might have an empty // block here. freeGroup_sync(bd); } else { push_scanned_block(bd, ws); } } // Otherwise, push this block out to the global list. else { DEBUG_ONLY( generation *gen ); DEBUG_ONLY( gen = ws->gen ); debugTrace(DEBUG_gc, "push todo block %p (%ld words), step %d, todo_q: %ld", bd->start, (unsigned long)(bd->free - bd->u.scan), gen->no, dequeElements(ws->todo_q)); if (!pushWSDeque(ws->todo_q, bd)) { bd->link = ws->todo_overflow; ws->todo_overflow = bd; ws->n_todo_overflow++; } } } ws->todo_bd = NULL; ws->todo_free = NULL; ws->todo_lim = NULL; alloc_todo_block(ws, size); p = ws->todo_free; ws->todo_free += size; return p; }