/************************************ * Function name: Histogram::Histogram() * Description: Creates a Histogram object and validates the arguments * Arguments: * int32_t *bucketList - a sorted ascending (non-constant) array * of minimum bucket values (the last index is the maximum value for the * final bucket). * int32_t _bucketCount - the number of elements in the bucketList array * Return value: a new Histogram class * ChangeLog: * Author Date Description * --------------- -------- ---------------------------------------- */ Histogram::Histogram ( const COUNTER * bucketList, const COUNTER _bucketCount, const bool dontInitialize ) { /* Do basic consistency checks on the histogram buckets */ if ( bucketList == NULL ) { exit ( 1 ); } if ( _bucketCount <= 1 ) { exit ( 1 ); } bucketCount = _bucketCount; buckets = new COUNTER[(int)bucketCount]; bucketValues = new COUNTER[(int)bucketCount]; /* Copy in the buckets array and ensure that the values are * monotonically increasing and not constant */ if ( !dontInitialize ) { initializeBucketList ( bucketList ); clearBuckets(); } }
/************************************ * Function name: CategoryHistogram::CategoryHistogram * Description: Category histograms don't care about ordering in the * * bucket list. All buckets are of size one. * Arguments: * Return value: bool */ CategoryHistogram::CategoryHistogram ( const COUNTER * bucketList, const COUNTER bucketCount_ ) : Histogram ( bucketList, bucketCount_, true ) { initializeBucketList ( bucketList ); clearBuckets(); }
bool Scheduler::postProcess ( void ) { /* Post-process all active fronts. */ for(Int p=0; p<numActiveFronts; p++) { /* Get the front from the "active fronts" permutation. */ Int f = afPerm[p]; Front *front = (&frontList[f]); SparseMeta *meta = &(front->sparseMeta); bool isDense = front->isDense(); bool isSparse = front->isSparse(); FrontState state = front->state; FrontState nextState = state; /* The post-processing we do depends on the state: */ switch(state) { /* There's nothing to do if you're waiting to be allocated. */ case ALLOCATE_WAIT: break; /* The only time we stay in ASSEMBLE_S is if we can't get to * adding the task to the work queue in a particular pass. * This happens when we have a ton of other work to do. */ case ASSEMBLE_S: break; /* If we're in CHILD_WAIT, see if all of the children are ready. */ case CHILD_WAIT: { // assert(isSparse); /* If all the children are ready then we can proceed. */ int nc = meta->nc; if(nc == 0) { initializeBucketList(f); nextState = FACTORIZE; } break; } /* If we're in the middle of a factorization: */ case FACTORIZE: // // IsRReadyEarly experimental feature : pulls R from the GPU // // R is computed but the contribution block is not. This // // method is under development and not yet available for // // production use. // if(isSparse && (&bucketLists[f])->IsRReadyEarly()) { // /* If we haven't created the event yet, create it. */ // if(eventFrontDataReady[f] == NULL) { // // Piggyback the synchronization on the next kernel // // launch. // cudaEventCreate(&eventFrontDataReady[f]); // cudaEventRecord(eventFrontDataReady[f], // kernelStreams[activeSet^1]); } // /* We must have created the event on the last kernel // launch so try to pull R off the GPU. */ else { // pullFrontData(f); } } break; // At this point, the R factor is ready to be pulled from the GPU. case FACTORIZE_COMPLETE: { /* If we haven't created the event yet, create it. */ if(eventFrontDataReady[f] == NULL) { // Piggyback the synchronization on the next kernel launch. cudaEventCreate(&eventFrontDataReady[f]); cudaEventRecord(eventFrontDataReady[f], kernelStreams[activeSet^1]); } /* We must have created the event already during factorize, so instead try to pull R off the GPU. */ else { pullFrontData(f); } /* If the front is dense or staged, then we can't assemble into the parent, so just cleanup. */ if(isDense || meta->isStaged) { nextState = CLEANUP; } /* Else we're sparse and not staged so it means we have memory to assemble into the parent. */ else { nextState = PARENT_WAIT; } break; } /* If we're waiting on the parent to be allocated: */ case PARENT_WAIT: { // assert(isSparse); /* Make sure we're trying to pull the R factor off the GPU. */ pullFrontData(f); // If we have a parent, allocate it and proceed to PUSH_ASSEMBLE Int pids = front->pids; if(pids != EMPTY) { activateFront(pids); nextState = PUSH_ASSEMBLE; } /* Else the parent is the dummy, so cleanup and move to done. */ else { nextState = CLEANUP; } break; } /* The only time we stay in PUSH_ASSEMBLE is if we can't get to * adding the task to the work queue in a particular pass. * This happens when we have a ton of other work to do. */ case PUSH_ASSEMBLE: // assert(isSparse); break; /* If we're in CLEANUP then we need to free the front. */ case CLEANUP: { /* If we were able to get the R factor and free the front. */ if(pullFrontData(f) && finishFront(f)) { /* Update the parent's child count. */ Int pid = front->pids; if(pid != EMPTY) (&frontList[pid])->sparseMeta.nc--; /* Move to DONE. */ nextState = DONE; /* Keep track of the # completed. */ numFrontsCompleted++; /* Revisit the same position again since a front was * swapped to the current location. */ p--; } break; } /* This is the done state with nothing to do. */ case DONE: break; } #if 0 if(front->printMe) { printf("[PostProcessing] %g : %d -> %d\n", (double) (front->fidg), state, nextState); // StateNames[state], StateNames[nextState]); debugDumpFront(front); } #endif /* Save the next state back to the frontDescriptor. */ front->state = nextState; } // printf("%2.2f completed.\n", 100 * (double) numCompleted / (double) // numFronts); /* Return whether all the fronts are DONE. */ return (numFronts == numFrontsCompleted); }