/* Transfer up to numToFetch items from 'from' to 'to', handling overreach */ static int getFromSharedStack(Set_t* to, Set_t* from, long numToFetch) { /* FetchAndAdd returns the pre-added value */ ptr_t *oldFromCursor = (ptr_t *) FetchAndAdd( (long *)&from->last, -(sizeof(val_t) * numToFetch)); ptr_t *newFromCursor = oldFromCursor - numToFetch; if (oldFromCursor <= from->data) { /* Handle complete overreach */ numToFetch = 0; /* Multiple processors might execute this; ok since there are no increments. */ from->last = from->data; } else if (newFromCursor < from->data) { /* Handle partial overreach */ /* Fetching fewer items than requested */ numToFetch -= (from->data - newFromCursor); /* Recompute newFromCursor */ newFromCursor = oldFromCursor - numToFetch; assert(numToFetch > 0); /* Multiple processors might execute this; ok since there are no increments */ from->last = from->data; } memcpy(to->last, newFromCursor, sizeof(val_t) * numToFetch); to->last += numToFetch; assert(to->last < to->limit); return numToFetch; }
void release() { //Decrement ref count uval count = FetchAndAdd(&refCount, uval(-1LL)); if (count == 1) { //We were last decrementer delete this; } }
int pushSharedStack(int conditional, SharedStack_t* ss, LocalWork_t* lw) { int empty; enterRoom(ss->threeRoom,conditional ? 2 : 1); helpPushSharedStack(ss, lw); if (!conditional) { if (lw->hasShared == 1) { FetchAndAdd(&ss->numLocalStack,-1); lw->hasShared = 0; } } else assert(lw->hasShared == 0); assert(ss->numLocalStack >= 0); empty = (exitRoom(ss->threeRoom) == StackEmpty); FetchAndAdd(&ss->numPush, 1); return empty; }
void resetSharedStack(SharedStack_t* ss, LocalWork_t* lw, int getNext) { int position; assert(lw->hasShared == 0); lw->hasShared = 1; position = FetchAndAdd(&ss->numLocalStack, 1); if (position == 0 && getNext) { assert(ss->doubleProcess); SetTransfer(&ss->work.nextBackObjs, &ss->work.backObjs); SetTransfer(&ss->work.nextBackLocs, &ss->work.backLocs); } }
/* Transfer all items from 'from' to 'to' */ static void moveToSharedStack(Set_t* to, Set_t* from) { int numToTransfer = SetLength(from); ptr_t * oldToCursor = (ptr_t *) FetchAndAdd( (long *)&to->last, 4 * numToTransfer); if (to->last >= to->limit && to->limit > to->data) { fprintf(stderr,"Shared stack %lx of size %ld overflowed with %d items\n", (long)to, to->size, to->last - to->data); DIE("shared stack overflow"); } memcpy(oldToCursor, from->first, sizeof(val_t) * numToTransfer); from->last = from->data; }
static void CollectorOff(Proc_t *proc) { Thread_t *threadIterator = NULL; int isFirst; int nextGCType = Minor; /* GCType will be written to during this function for the next GC and so we save its value here for reading */ procChangeState(proc, GCWork, 608); proc->segmentType |= FlipOff; if (collectDiag >= 2) printf("Proc %d: entered CollectorOff\n", proc->procid); assert(SetIsEmpty(&proc->work.objs)); /* Local stack must be empty */ assert(GCStatus == GCPendingOff); memBarrier(); PadCopyRange(&proc->copyRange); /* Pad so that paranoid check works */ isFirst = (weakBarrier(barriers,proc) == 0); if (isFirst) { ResetJob(); } strongBarrier(barriers,proc); /* Local stacks must be empty. */ assert(isLocalWorkEmpty(&proc->work)); /* Replace all roots with replica */ if (isFirst) minor_global_scan(proc); /* Even for a major GC since we already flipped global locs tenured when GC started */ while ((threadIterator = NextJob()) != NULL) { complete_root_scan(proc, threadIterator); if (threadIterator->request == MajorGCRequestFromC) /* Runtime explicitly requests major GC */ nextGCType = Major; } procChangeState(proc, GCWork, 611); proc->numRoot += SetLength(&proc->work.roots) + SetLength(&proc->work.globals); while (!SetIsEmpty(&proc->work.roots)) { ploc_t root = (ploc_t) SetPop(&proc->work.roots); flipRootLoc(GCType, root); } while (!SetIsEmpty(&proc->work.globals)) { ptr_t global = SetPop(&proc->work.globals); ploc_t replicaLoc = DupGlobal(global); flipRootLoc(GCType, replicaLoc); } FetchAndAdd(&totalReplicated, proc->segUsage.bytesReplicated + proc->cycleUsage.bytesReplicated); strongBarrier(barriers,proc); /* Only the designated thread needs to perform the following */ if (isFirst) { if (GCType == Minor) { double liveRatio = 0.0; int i, copied = 0; paranoid_check_all(nursery, fromSpace, fromSpace, NULL, largeSpace); minor_global_promote(proc); for (i=0; i<NumProc; i++) { Proc_t *p = getNthProc(i);; copied += bytesCopied(&p->cycleUsage) + bytesCopied(&p->segUsage); } liveRatio = (double) (copied) / (double) Heap_GetUsed(nursery); add_statistic(&minorSurvivalStatistic, liveRatio); } else { /* Major */ discardNextSharedStack(workStack); /* Discard nextBackObj/nextBackLocs on major GC */ paranoid_check_all(nursery, fromSpace, toSpace, NULL, largeSpace); gc_large_endCollect(); HeapAdjust2(totalRequest, totalUnused, totalReplicated, CollectionRate, doAgressive ? 2 : 1, nursery, fromSpace, toSpace); reducedTenuredSize = Heap_GetSize(toSpace); expandedTenuredSize = reducedToExpanded(reducedTenuredSize, CollectionRate, doAgressive ? 2 : 1); Heap_Resize(fromSpace, 0, 1); typed_swap(Heap_t *, fromSpace, toSpace); NumMajorGC++; } typed_swap(int, primaryGlobalOffset, replicaGlobalOffset); typed_swap(int, primaryArrayOffset, replicaArrayOffset); typed_swap(int, primaryStackletOffset, replicaStackletOffset); Heap_Resize(nursery,reducedNurserySize,1); NumGC++; GCStatus = GCOff; if (Heap_GetAvail(fromSpace) < tenuredReserve + Heap_GetSize(nursery)) { /* The next GC needs to be a major GC so we must begin allocation in the fromSpace immediately. We permit allocation to continue so we don't flip on again too soon. However, allocation is restricted so the major collection is started soon so that an accurate survival rate can be computed. */ GCType = Major; fromSpace->top = fromSpace->cursor + (minOffRequest * NumProc) / sizeof(val_t); } else GCType = nextGCType; } /* All system threads need to reset their limit pointer */ ResetAllocation(proc, NULL); proc->writelistCursor = proc->writelistStart; strongBarrier(barriers,proc); establishCopyRange(proc); /* Called here to copyRanges are initialized for use in GCRelease */ if (collectDiag >= 2) printf("Proc %d: leaving CollectorOff\n", proc->procid); }
static void CollectorTransition(Proc_t *proc) { int isFirst = 0; Thread_t *threadIterator = NULL; /* Major vs Minor of current GC was determined at end of last GC */ procChangeState(proc, GCWork, 604); proc->segmentType |= (FlipTransition | ((GCType == Major) ? MajorWork : MinorWork)); switch (GCStatus) { case GCAgressive: /* Signalling to other processors that collector is turning on */ GCStatus = GCPendingOn; StopAllThreads(); break; case GCPendingOn: /* Responding to signal that collector is turning on */ break; default: DIE("CollectorTransition"); } /* Collection cannot proceed until all processors have stopped running mutators. While waiting for the processors, the "first" processor begins to do some prelimiary work. This work must be completed before any processor begins collection. As a result, the "first" processor is counted twice. */ isFirst = (weakBarrier(barriers,proc) == 0); if (isFirst) { ResetJob(); /* Reset counter so all user threads are scanned */ } resetSharedStack(workStack,&proc->work, 0); strongBarrier(barriers,proc); /* Reset root lists, compute thread-specific roots in parallel, determine whether a major GC was explicitly requested. */ FetchAndAdd(&totalUnused, sizeof(val_t) * (proc->allocLimit - proc->allocCursor)); assert(SetIsEmpty(&proc->work.roots)); while ((threadIterator = NextJob()) != NULL) { discard_root_scan(proc,threadIterator); if (threadIterator->used == 0) continue; initial_root_scan(proc,threadIterator); if (threadIterator->requestInfo >= 0) /* Allocation request */ FetchAndAdd(&totalRequest, threadIterator->requestInfo); } strongBarrier(barriers,proc); /* The "first" processor is in charge of the globals but must wait until all threads are processed before knowing if GC is major. The major GC does not take effect until the first minor GC is completed. */ if (isFirst) major_global_scan(proc); /* Always a major_global_scan because we must flip all globals */ strongBarrier(barriers, proc); /* Check local stack empty, prepare copy range, forward all the roots (first proc handles backpointers), transfer work from local to shared work stack */ procChangeState(proc, GCWork, 607); assert(SetIsEmpty(&proc->work.objs)); proc->numRoot += SetLength(&proc->work.roots) + SetLength(&proc->work.globals); /* Omit popSharedObjStack */ pushSharedStack(0,workStack, &proc->work); GCStatus = GCOn; strongBarrier(barriers, proc); }
static void CollectorOn(Proc_t *proc) { int isFirst = 0; Thread_t *threadIterator = NULL; /* Major vs Minor of current GC was determined at end of last GC */ procChangeState(proc, GCWork, 600); proc->segmentType |= (FlipOn | ((GCType == Major) ? MajorWork : MinorWork)); switch (GCStatus) { case GCOff: /* Signalling to other processors that collector is turning on */ GCStatus = (GCType == Major ? doAgressive : doMinorAgressive) ? GCPendingAgressive : GCPendingOn; StopAllThreads(); break; case GCPendingOn: /* Responding to signal that collector is turning on */ case GCPendingAgressive: break; default: DIE("CollectorOn"); } /* Collection cannot proceed until all processors have stopped running mutators. While waiting for the processors, the "first" processor begins to do some preliminary work. This work must be completed before any processor begins collection. As a result, the "first" processor is counted twice. */ isFirst = (weakBarrier(barriers,proc) == 0); if (isFirst) { Heap_ResetFreshPages(proc,nursery); if (GCType == Major) Heap_ResetFreshPages(proc,fromSpace); if (GCType == Minor) { if (Heap_GetAvail(fromSpace) < Heap_GetSize(nursery)) { printf("Warning: fromSpace has less available space than total nursery size.\n" " Probably due to fromSpace pointer array allocation.\n"); Heap_Resize(fromSpace, Heap_GetSize(fromSpace) + reducedNurserySize, 0); assert(Heap_GetAvail(fromSpace) >= Heap_GetSize(nursery)); } } totalUnused = 0; totalRequest = 0; totalReplicated = 0; ResetJob(); /* Reset counter so all user threads are scanned */ } strongBarrier(barriers,proc); /* Reset root lists, compute thread-specific roots in parallel, determine whether a major GC was explicitly requested. */ FetchAndAdd(&totalUnused, sizeof(val_t) * (proc->allocLimit - proc->allocCursor)); assert(SetIsEmpty(&proc->work.roots)); while ((threadIterator = NextJob()) != NULL) { initial_root_scan(proc,threadIterator); if (threadIterator->requestInfo >= 0) /* Allocation request */ FetchAndAdd(&totalRequest, threadIterator->requestInfo); } strongBarrier(barriers,proc); /* The "first" processor is in charge of the globals but must wait until all threads are processed before knowing if GC is major. The major GC does not take effect until the first minor GC is completed. */ if (isFirst) { paranoid_check_all(nursery, fromSpace, NULL, NULL, largeSpace); major_global_scan(proc); /* Always a major_global_scan because we must flip all globals */ if (GCType == Major) { Heap_Resize(fromSpace,expandedTenuredSize,0); Heap_Resize(toSpace,expandedTenuredSize,1); gc_large_startCollect(); } else { /* Heap_Resize(nursery,expandedNurserySize,0); XXXXX */ Heap_Resize(nursery,maximumNurserySize,0); /* Allocating large arrays makes this necesary */ } } resetSharedStack(workStack,&proc->work, 1); strongBarrier(barriers, proc); /* Check local stack empty, prepare copy range, forward all the roots (first proc handles backpointers), transfer work from local to shared work stack */ procChangeState(proc, GCWork, 603); assert(SetIsEmpty(&proc->work.objs)); establishCopyRange(proc); proc->numRoot += SetLength(&proc->work.roots) + SetLength(&proc->work.globals); /* Omit popSharedObjStack */ pushSharedStack(0,workStack, &proc->work); GCStatus = (GCType == Major ? doAgressive : doMinorAgressive) ? GCAgressive : GCOn; strongBarrier(barriers, proc); }
void popSharedStack(SharedStack_t* ss, LocalWork_t* lw) { int stackletFetched = 0; int globalLocFetched = 0; int rootLocFetched = 0; int grayRegionFetched = 0; int objFetched = 0; int segFetched = 0; int backLocFetched = 0; int backObjFetched = 0; int stackletRequest = threadFetchSize; int globalLocRequest = globalLocFetchSize; int rootLocRequest = rootLocFetchSize; int grayRegionRequest = grayRegionFetchSize; int objRequest = objFetchSize; int segRequest = segFetchSize; int backLocRequest = backLocFetchSize; int backObjRequest = backObjFetchSize; int objSize = SetLength(&ss->work.objs); objRequest = Min(objFetchSize, 1 + (int) (objSize / (double) NumProc)); enterRoom(ss->threeRoom,0); stackletFetched = getFromSharedStack(&lw->stacklets, &ss->work.stacklets, stackletRequest); globalLocRequest = MakeFraction(globalLocRequest, stackletRequest, stackletFetched); if (globalLocRequest == 0) goto done; globalLocFetched = getFromSharedStack(&lw->globals, &ss->work.globals, globalLocRequest); rootLocRequest = MakeFraction(rootLocRequest, globalLocRequest, globalLocFetched); if (rootLocRequest == 0) goto done; rootLocFetched = getFromSharedStack(&lw->roots, &ss->work.roots, rootLocRequest); grayRegionRequest = MakeFraction(grayRegionRequest, rootLocRequest, rootLocFetched); if(grayRegionRequest == 0) goto done; grayRegionFetched = getFromSharedStack(&lw->grayRegion, &ss->work.grayRegion, regionSize * grayRegionRequest) / regionSize; objRequest = MakeFraction(objRequest, grayRegionRequest, grayRegionFetched); if (objRequest == 0) goto done; objFetched = getFromSharedStack(&lw->objs, &ss->work.objs, objRequest); if ((segRequest = MakeFraction(segRequest, objRequest, objFetched)) == 0) goto done; segFetched = getFromSharedStack(&lw->segments, &ss->work.segments, segSize * segRequest) / segSize; backObjRequest = MakeFraction(backObjRequest, segRequest, segFetched); if (backObjRequest == 0) goto done; backObjFetched = getFromSharedStack(&lw->backObjs, &ss->work.backObjs, backObjRequest); backLocRequest = MakeFraction(backLocRequest, backObjRequest, backObjFetched); if (backLocRequest == 0) goto done; backLocFetched = getFromSharedStack(&lw->backLocs, &ss->work.backLocs, backLocRequest); done: assert(ss->numLocalStack >= 0); if (stackletFetched || globalLocFetched || rootLocFetched || grayRegionFetched || objFetched || segFetched || backObjFetched || backLocFetched) { assert(lw->hasShared == 0); lw->hasShared = 1; FetchAndAdd(&ss->numLocalStack,1); } FetchAndAdd(&ss->numPop, 1); exitRoom(ss->threeRoom); }