static void searchStackChunk (HashTable *addrs, StgPtr sp, StgPtr stack_end) { StgPtr p; const StgRetInfoTable *info; p = sp; while (p < stack_end) { info = get_ret_itbl((StgClosure *)p); switch (info->i.type) { case RET_SMALL: case RET_BIG: checkAddress(addrs, (void*)info); break; default: break; } p += stack_frame_sizeW((StgClosure*)p); } }
static void stackSqueeze(Capability *cap, StgTSO *tso, StgPtr bottom) { StgPtr frame; uint32_t adjacent_update_frames; struct stack_gap *gap; // Stage 1: // Traverse the stack upwards, replacing adjacent update frames // with a single update frame and a "stack gap". A stack gap // contains two values: the size of the gap, and the distance // to the next gap (or the stack top). frame = tso->stackobj->sp; ASSERT(frame < bottom); adjacent_update_frames = 0; gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame)); while (frame <= bottom) { switch (get_ret_itbl((StgClosure *)frame)->i.type) { case UPDATE_FRAME: { if (adjacent_update_frames > 0) { TICK_UPD_SQUEEZED(); } adjacent_update_frames++; frame += sizeofW(StgUpdateFrame); continue; } default: // we're not in a gap... check whether this is the end of a gap // (an update frame can't be the end of a gap). if (adjacent_update_frames > 1) { gap = updateAdjacentFrames(cap, tso, (StgUpdateFrame*)(frame - sizeofW(StgUpdateFrame)), adjacent_update_frames, gap); } adjacent_update_frames = 0; frame += stack_frame_sizeW((StgClosure *)frame); continue; } } if (adjacent_update_frames > 1) { gap = updateAdjacentFrames(cap, tso, (StgUpdateFrame*)(frame - sizeofW(StgUpdateFrame)), adjacent_update_frames, gap); } // Now we have a stack with gap-structs in it, and we have to walk down // shoving the stack up to fill in the gaps. A diagram might // help: // // +| ********* | // | ********* | <- sp // | | // | | <- gap_start // | ......... | | // | stack_gap | <- gap | chunk_size // | ......... | | // | ......... | <- gap_end v // | ********* | // | ********* | // | ********* | // -| ********* | // // 'sp' points the the current top-of-stack // 'gap' points to the stack_gap structure inside the gap // ***** indicates real stack data // ..... indicates gap // <empty> indicates unused // { StgWord8 *sp; StgWord8 *gap_start, *next_gap_start, *gap_end; uint32_t chunk_size; next_gap_start = (StgWord8*)gap + sizeof(StgUpdateFrame); sp = next_gap_start; while ((StgPtr)gap > tso->stackobj->sp) { // we're working in *bytes* now... gap_start = next_gap_start; gap_end = gap_start - gap->gap_size * sizeof(W_); gap = gap->next_gap; next_gap_start = (StgWord8*)gap + sizeof(StgUpdateFrame); chunk_size = gap_end - next_gap_start; sp -= chunk_size; memmove(sp, next_gap_start, chunk_size); } tso->stackobj->sp = (StgPtr)sp; } }
/* ----------------------------------------------------------------------------- * Pausing a thread * * We have to prepare for GC - this means doing lazy black holing * here. We also take the opportunity to do stack squeezing if it's * turned on. * -------------------------------------------------------------------------- */ void threadPaused(Capability *cap, StgTSO *tso) { StgClosure *frame; const StgRetInfoTable *info; const StgInfoTable *bh_info; const StgInfoTable *cur_bh_info USED_IF_THREADS; StgClosure *bh; StgPtr stack_end; uint32_t words_to_squeeze = 0; uint32_t weight = 0; uint32_t weight_pending = 0; bool prev_was_update_frame = false; StgWord heuristic_says_squeeze; // Check to see whether we have threads waiting to raise // exceptions, and we're not blocking exceptions, or are blocked // interruptibly. This is important; if a thread is running with // TSO_BLOCKEX and becomes blocked interruptibly, this is the only // place we ensure that the blocked_exceptions get a chance. maybePerformBlockedException (cap, tso); if (tso->what_next == ThreadKilled) { return; } // NB. Updatable thunks *must* be blackholed, either by eager blackholing or // lazy blackholing. See Note [upd-black-hole] in sm/Scav.c. stack_end = tso->stackobj->stack + tso->stackobj->stack_size; frame = (StgClosure *)tso->stackobj->sp; while ((P_)frame < stack_end) { info = get_ret_itbl(frame); switch (info->i.type) { case UPDATE_FRAME: // If we've already marked this frame, then stop here. if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) { if (prev_was_update_frame) { words_to_squeeze += sizeofW(StgUpdateFrame); weight += weight_pending; weight_pending = 0; } goto end; } SET_INFO(frame, (StgInfoTable *)&stg_marked_upd_frame_info); bh = ((StgUpdateFrame *)frame)->updatee; bh_info = bh->header.info; #if defined(THREADED_RTS) retry: #endif // Note [suspend duplicate work] // // If the info table is a WHITEHOLE or a BLACKHOLE, then // another thread has claimed it (via the SET_INFO() // below), or is in the process of doing so. In that case // we want to suspend the work that the current thread has // done on this thunk and wait until the other thread has // finished. // // If eager blackholing is taking place, it could be the // case that the blackhole points to the current // TSO. e.g.: // // this thread other thread // -------------------------------------------------------- // c->indirectee = other_tso; // c->header.info = EAGER_BH // threadPaused(): // c->header.info = WHITEHOLE // c->indirectee = other_tso // c->indirectee = this_tso; // c->header.info = EAGER_BH // c->header.info = BLACKHOLE // threadPaused() // *** c->header.info is now BLACKHOLE, // c->indirectee points to this_tso // // So in this case do *not* suspend the work of the // current thread, because the current thread will become // deadlocked on itself. See #5226 for an instance of // this bug. // // Note that great care is required when entering computations // suspended by this mechanism. See Note [AP_STACKs must be eagerly // blackholed] for details. if (((bh_info == &stg_BLACKHOLE_info) && ((StgInd*)bh)->indirectee != (StgClosure*)tso) || (bh_info == &stg_WHITEHOLE_info)) { debugTrace(DEBUG_squeeze, "suspending duplicate work: %ld words of stack", (long)((StgPtr)frame - tso->stackobj->sp)); // If this closure is already an indirection, then // suspend the computation up to this point. // NB. check raiseAsync() to see what happens when // we're in a loop (#2783). suspendComputation(cap,tso,(StgUpdateFrame*)frame); // Now drop the update frame, and arrange to return // the value to the frame underneath: tso->stackobj->sp = (StgPtr)frame + sizeofW(StgUpdateFrame) - 2; tso->stackobj->sp[1] = (StgWord)bh; ASSERT(bh->header.info != &stg_TSO_info); tso->stackobj->sp[0] = (W_)&stg_enter_info; // And continue with threadPaused; there might be // yet more computation to suspend. frame = (StgClosure *)(tso->stackobj->sp + 2); prev_was_update_frame = false; continue; } // We should never have made it here in the event of blackholes that // we already own; they should have been marked when we blackholed // them and consequently we should have stopped our stack walk // above. ASSERT(!((bh_info == &stg_BLACKHOLE_info) && (((StgInd*)bh)->indirectee == (StgClosure*)tso))); // zero out the slop so that the sanity checker can tell // where the next closure is. OVERWRITING_CLOSURE(bh); // an EAGER_BLACKHOLE or CAF_BLACKHOLE gets turned into a // BLACKHOLE here. #if defined(THREADED_RTS) // first we turn it into a WHITEHOLE to claim it, and if // successful we write our TSO and then the BLACKHOLE info pointer. cur_bh_info = (const StgInfoTable *) cas((StgVolatilePtr)&bh->header.info, (StgWord)bh_info, (StgWord)&stg_WHITEHOLE_info); if (cur_bh_info != bh_info) { bh_info = cur_bh_info; goto retry; } #endif // The payload of the BLACKHOLE points to the TSO ((StgInd *)bh)->indirectee = (StgClosure *)tso; write_barrier(); SET_INFO(bh,&stg_BLACKHOLE_info); // .. and we need a write barrier, since we just mutated the closure: recordClosureMutated(cap,bh); // We pretend that bh has just been created. LDV_RECORD_CREATE(bh); frame = (StgClosure *) ((StgUpdateFrame *)frame + 1); if (prev_was_update_frame) { words_to_squeeze += sizeofW(StgUpdateFrame); weight += weight_pending; weight_pending = 0; } prev_was_update_frame = true; break; case UNDERFLOW_FRAME: case STOP_FRAME: goto end; // normal stack frames; do nothing except advance the pointer default: { uint32_t frame_size = stack_frame_sizeW(frame); weight_pending += frame_size; frame = (StgClosure *)((StgPtr)frame + frame_size); prev_was_update_frame = false; } } } end: // Should we squeeze or not? Arbitrary heuristic: we squeeze if // the number of words we have to shift down is less than the // number of stack words we squeeze away by doing so. // The threshold was bumped from 5 to 8 as a result of #2797 heuristic_says_squeeze = ((weight <= 8 && words_to_squeeze > 0) || weight < words_to_squeeze); debugTrace(DEBUG_squeeze, "words_to_squeeze: %d, weight: %d, squeeze: %s", words_to_squeeze, weight, heuristic_says_squeeze ? "YES" : "NO"); if (RtsFlags.GcFlags.squeezeUpdFrames == true && heuristic_says_squeeze) { stackSqueeze(cap, tso, (StgPtr)frame); tso->flags |= TSO_SQUEEZED; // This flag tells threadStackOverflow() that the stack was // squeezed, because it may not need to be expanded. } else { tso->flags &= ~TSO_SQUEEZED; } }
static void thread_stack(StgPtr p, StgPtr stack_end) { const StgRetInfoTable* info; StgWord bitmap; nat size; // highly similar to scavenge_stack, but we do pointer threading here. while (p < stack_end) { // *p must be the info pointer of an activation // record. All activation records have 'bitmap' style layout // info. // info = get_ret_itbl((StgClosure *)p); switch (info->i.type) { // Dynamic bitmap: the mask is stored on the stack case RET_DYN: { StgWord dyn; dyn = ((StgRetDyn *)p)->liveness; // traverse the bitmap first bitmap = RET_DYN_LIVENESS(dyn); p = (P_)&((StgRetDyn *)p)->payload[0]; size = RET_DYN_BITMAP_SIZE; while (size > 0) { if ((bitmap & 1) == 0) { thread((StgClosure **)p); } p++; bitmap = bitmap >> 1; size--; } // skip over the non-ptr words p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE; // follow the ptr words for (size = RET_DYN_PTRS(dyn); size > 0; size--) { thread((StgClosure **)p); p++; } continue; } // small bitmap (<= 32 entries, or 64 on a 64-bit machine) case UPDATE_FRAME: case STOP_FRAME: case CATCH_FRAME: case RET_SMALL: bitmap = BITMAP_BITS(info->i.layout.bitmap); size = BITMAP_SIZE(info->i.layout.bitmap); p++; // NOTE: the payload starts immediately after the info-ptr, we // don't have an StgHeader in the same sense as a heap closure. while (size > 0) { if ((bitmap & 1) == 0) { thread((StgClosure **)p); } p++; bitmap = bitmap >> 1; size--; } continue; #ifdef ALLOW_INTERPRETER case RET_BCO: { StgBCO *bco; nat size; p++; bco = (StgBCO *)*p; thread((StgClosure **)p); p++; size = BCO_BITMAP_SIZE(bco); thread_large_bitmap(p, BCO_BITMAP(bco), size); p += size; continue; } #endif // ALLOW_INTERPRETER // large bitmap (> 32 entries, or 64 on a 64-bit machine) case RET_BIG: p++; size = GET_LARGE_BITMAP(&info->i)->size; thread_large_bitmap(p, GET_LARGE_BITMAP(&info->i), size); p += size; continue; case RET_FUN: { StgRetFun *ret_fun = (StgRetFun *)p; StgFunInfoTable *fun_info; fun_info = FUN_INFO_PTR_TO_STRUCT(UNTAG_CLOSURE((StgClosure *) get_threaded_info((StgPtr)ret_fun->fun))); // *before* threading it! thread(&ret_fun->fun); p = thread_arg_block(fun_info, ret_fun->payload); continue; } default: barf("thread_stack: weird activation record found on stack: %d", (int)(info->i.type)); } } }
// check an individual stack object StgOffset checkStackFrame( StgPtr c ) { nat size; const StgRetInfoTable* info; info = get_ret_itbl((StgClosure *)c); /* All activation records have 'bitmap' style layout info. */ switch (info->i.type) { case RET_DYN: /* Dynamic bitmap: the mask is stored on the stack */ { StgWord dyn; StgPtr p; StgRetDyn* r; r = (StgRetDyn *)c; dyn = r->liveness; p = (P_)(r->payload); checkSmallBitmap(p,RET_DYN_LIVENESS(r->liveness),RET_DYN_BITMAP_SIZE); p += RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE; // skip over the non-pointers p += RET_DYN_NONPTRS(dyn); // follow the ptr words for (size = RET_DYN_PTRS(dyn); size > 0; size--) { checkClosureShallow((StgClosure *)*p); p++; } return sizeofW(StgRetDyn) + RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE + RET_DYN_NONPTRS(dyn) + RET_DYN_PTRS(dyn); } case UPDATE_FRAME: ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgUpdateFrame*)c)->updatee)); case ATOMICALLY_FRAME: case CATCH_RETRY_FRAME: case CATCH_STM_FRAME: case CATCH_FRAME: // small bitmap cases (<= 32 entries) case UNDERFLOW_FRAME: case STOP_FRAME: case RET_SMALL: size = BITMAP_SIZE(info->i.layout.bitmap); checkSmallBitmap((StgPtr)c + 1, BITMAP_BITS(info->i.layout.bitmap), size); return 1 + size; case RET_BCO: { StgBCO *bco; nat size; bco = (StgBCO *)*(c+1); size = BCO_BITMAP_SIZE(bco); checkLargeBitmap((StgPtr)c + 2, BCO_BITMAP(bco), size); return 2 + size; } case RET_BIG: // large bitmap (> 32 entries) size = GET_LARGE_BITMAP(&info->i)->size; checkLargeBitmap((StgPtr)c + 1, GET_LARGE_BITMAP(&info->i), size); return 1 + size; case RET_FUN: { StgFunInfoTable *fun_info; StgRetFun *ret_fun; ret_fun = (StgRetFun *)c; fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun)); size = ret_fun->size; switch (fun_info->f.fun_type) { case ARG_GEN: checkSmallBitmap((StgPtr)ret_fun->payload, BITMAP_BITS(fun_info->f.b.bitmap), size); break; case ARG_GEN_BIG: checkLargeBitmap((StgPtr)ret_fun->payload, GET_FUN_LARGE_BITMAP(fun_info), size); break; default: checkSmallBitmap((StgPtr)ret_fun->payload, BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]), size); break; } return sizeofW(StgRetFun) + size; } default: barf("checkStackFrame: weird activation record found on stack (%p %d).",c,info->i.type); } }
static void scavenge_stack(StgPtr p, StgPtr stack_end) { const StgRetInfoTable* info; StgWord bitmap; nat size; /* * Each time around this loop, we are looking at a chunk of stack * that starts with an activation record. */ while (p < stack_end) { info = get_ret_itbl((StgClosure *)p); switch (info->i.type) { case UPDATE_FRAME: // In SMP, we can get update frames that point to indirections // when two threads evaluate the same thunk. We do attempt to // discover this situation in threadPaused(), but it's // possible that the following sequence occurs: // // A B // enter T // enter T // blackhole T // update T // GC // // Now T is an indirection, and the update frame is already // marked on A's stack, so we won't traverse it again in // threadPaused(). We could traverse the whole stack again // before GC, but that seems like overkill. // // Scavenging this update frame as normal would be disastrous; // the updatee would end up pointing to the value. So we // check whether the value after evacuation is a BLACKHOLE, // and if not, we change the update frame to an stg_enter // frame that simply returns the value. Hence, blackholing is // compulsory (otherwise we would have to check for thunks // too). // // Note [upd-black-hole] // One slight hiccup is that the THUNK_SELECTOR machinery can // overwrite the updatee with an IND. In parallel GC, this // could even be happening concurrently, so we can't check for // the IND. Fortunately if we assume that blackholing is // happening (either lazy or eager), then we can be sure that // the updatee is never a THUNK_SELECTOR and we're ok. // NB. this is a new invariant: blackholing is not optional. { StgUpdateFrame *frame = (StgUpdateFrame *)p; StgClosure *v; evacuate(&frame->updatee); v = frame->updatee; if (GET_CLOSURE_TAG(v) != 0 || (get_itbl(v)->type != BLACKHOLE)) { // blackholing is compulsory, see above. frame->header.info = (const StgInfoTable*)&stg_enter_checkbh_info; } ASSERT(v->header.info != &stg_TSO_info); p += sizeofW(StgUpdateFrame); continue; } // small bitmap (< 32 entries, or 64 on a 64-bit machine) case CATCH_STM_FRAME: case CATCH_RETRY_FRAME: case ATOMICALLY_FRAME: case UNDERFLOW_FRAME: case STOP_FRAME: case CATCH_FRAME: case RET_SMALL: bitmap = BITMAP_BITS(info->i.layout.bitmap); size = BITMAP_SIZE(info->i.layout.bitmap); // NOTE: the payload starts immediately after the info-ptr, we // don't have an StgHeader in the same sense as a heap closure. p++; p = scavenge_small_bitmap(p, size, bitmap); follow_srt: if (major_gc) scavenge_srt((StgClosure **)GET_SRT(info), info->i.srt_bitmap); continue; case RET_BCO: { StgBCO *bco; nat size; p++; evacuate((StgClosure **)p); bco = (StgBCO *)*p; p++; size = BCO_BITMAP_SIZE(bco); scavenge_large_bitmap(p, BCO_BITMAP(bco), size); p += size; continue; } // large bitmap (> 32 entries, or > 64 on a 64-bit machine) case RET_BIG: { nat size; size = GET_LARGE_BITMAP(&info->i)->size; p++; scavenge_large_bitmap(p, GET_LARGE_BITMAP(&info->i), size); p += size; // and don't forget to follow the SRT goto follow_srt; } // Dynamic bitmap: the mask is stored on the stack, and // there are a number of non-pointers followed by a number // of pointers above the bitmapped area. (see StgMacros.h, // HEAP_CHK_GEN). case RET_DYN: { StgWord dyn; dyn = ((StgRetDyn *)p)->liveness; // traverse the bitmap first bitmap = RET_DYN_LIVENESS(dyn); p = (P_)&((StgRetDyn *)p)->payload[0]; size = RET_DYN_BITMAP_SIZE; p = scavenge_small_bitmap(p, size, bitmap); // skip over the non-ptr words p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE; // follow the ptr words for (size = RET_DYN_PTRS(dyn); size > 0; size--) { evacuate((StgClosure **)p); p++; } continue; } case RET_FUN: { StgRetFun *ret_fun = (StgRetFun *)p; StgFunInfoTable *fun_info; evacuate(&ret_fun->fun); fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun)); p = scavenge_arg_block(fun_info, ret_fun->payload); goto follow_srt; } default: barf("scavenge_stack: weird activation record found on stack: %d", (int)(info->i.type)); } } }
// check an individual stack object StgOffset checkStackFrame( StgPtr c ) { nat size; const StgRetInfoTable* info; info = get_ret_itbl((StgClosure *)c); /* All activation records have 'bitmap' style layout info. */ switch (info->i.type) { case UPDATE_FRAME: ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgUpdateFrame*)c)->updatee)); case ATOMICALLY_FRAME: case CATCH_RETRY_FRAME: case CATCH_STM_FRAME: case CATCH_FRAME: // small bitmap cases (<= 32 entries) case UNDERFLOW_FRAME: case STOP_FRAME: case RET_SMALL: size = BITMAP_SIZE(info->i.layout.bitmap); checkSmallBitmap((StgPtr)c + 1, BITMAP_BITS(info->i.layout.bitmap), size); return 1 + size; case RET_BCO: { StgBCO *bco; nat size; bco = (StgBCO *)*(c+1); size = BCO_BITMAP_SIZE(bco); checkLargeBitmap((StgPtr)c + 2, BCO_BITMAP(bco), size); return 2 + size; } case RET_BIG: // large bitmap (> 32 entries) size = GET_LARGE_BITMAP(&info->i)->size; checkLargeBitmap((StgPtr)c + 1, GET_LARGE_BITMAP(&info->i), size); return 1 + size; case RET_FUN: { StgFunInfoTable *fun_info; StgRetFun *ret_fun; ret_fun = (StgRetFun *)c; fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun)); size = ret_fun->size; switch (fun_info->f.fun_type) { case ARG_GEN: checkSmallBitmap((StgPtr)ret_fun->payload, BITMAP_BITS(fun_info->f.b.bitmap), size); break; case ARG_GEN_BIG: checkLargeBitmap((StgPtr)ret_fun->payload, GET_FUN_LARGE_BITMAP(fun_info), size); break; default: checkSmallBitmap((StgPtr)ret_fun->payload, BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]), size); break; } return sizeofW(StgRetFun) + size; } default: barf("checkStackFrame: weird activation record found on stack (%p %d).",c,info->i.type); } }