예제 #1
0
/************************************
 * Function name: Histogram::Histogram()
 * Description:   Creates a Histogram object and validates the arguments
 * Arguments:
 *   int32_t *bucketList - a sorted ascending (non-constant) array
 *   of minimum bucket values (the last index is the maximum value for the
 *   final bucket).
 *   int32_t _bucketCount - the number of elements in the bucketList array
 * Return value:  a new Histogram class
 * ChangeLog:
 *   Author          Date      Description
 *   --------------- --------  ----------------------------------------
 */
Histogram::Histogram ( const COUNTER * bucketList,
                       const COUNTER   _bucketCount,
                       const bool dontInitialize ) {

  /* Do basic consistency checks on the histogram buckets */
  if ( bucketList == NULL ) {
    exit ( 1 );
  }

  if ( _bucketCount <= 1 ) {
    exit ( 1 );
  }

  bucketCount     = _bucketCount;
  buckets         = new COUNTER[(int)bucketCount];
  bucketValues    = new COUNTER[(int)bucketCount];

  /* Copy in the buckets array and ensure that the values are
   *  monotonically increasing and not constant
   */
  if ( !dontInitialize ) {
    initializeBucketList ( bucketList );

    clearBuckets();
  }

}
예제 #2
0
/************************************
 * Function name: CategoryHistogram::CategoryHistogram
 * Description:   Category histograms don't care about ordering in the *
 *  bucket list.  All buckets are of size one.
 * Arguments:
 * Return value:  bool
 */
CategoryHistogram::CategoryHistogram ( const COUNTER * bucketList,
                                       const COUNTER   bucketCount_ )
  : Histogram ( bucketList,
                bucketCount_,
                true ) {
  initializeBucketList ( bucketList );
  clearBuckets();
}
예제 #3
0
bool Scheduler::postProcess
(
    void
)
{
    /* Post-process all active fronts. */
    for(Int p=0; p<numActiveFronts; p++)
    {
        /* Get the front from the "active fronts" permutation. */
        Int f = afPerm[p];

        Front *front = (&frontList[f]);
        SparseMeta *meta = &(front->sparseMeta);
        bool isDense = front->isDense();
        bool isSparse = front->isSparse();
        FrontState state = front->state;
        FrontState nextState = state;

        /* The post-processing we do depends on the state: */
        switch(state)
        {
            /* There's nothing to do if you're waiting to be allocated. */
            case ALLOCATE_WAIT:
                break;

            /* The only time we stay in ASSEMBLE_S is if we can't get to
             * adding the task to the work queue in a particular pass.
             * This happens when we have a ton of other work to do. */
            case ASSEMBLE_S: break;

            /* If we're in CHILD_WAIT, see if all of the children are ready. */
            case CHILD_WAIT:
            {
                // assert(isSparse);

                /* If all the children are ready then we can proceed. */
                int nc = meta->nc;
                if(nc == 0)
                {
                    initializeBucketList(f);
                    nextState = FACTORIZE;
                }
                break;
            }

            /* If we're in the middle of a factorization: */
            case FACTORIZE:

//              // IsRReadyEarly experimental feature : pulls R from the GPU
//              // R is computed but the contribution block is not.  This
//              // method is under development and not yet available for
//              // production use.
//              if(isSparse && (&bucketLists[f])->IsRReadyEarly()) {
//                  /* If we haven't created the event yet, create it. */
//                  if(eventFrontDataReady[f] == NULL) {
//                      // Piggyback the synchronization on the next kernel
//                      // launch.
//                      cudaEventCreate(&eventFrontDataReady[f]);
//                      cudaEventRecord(eventFrontDataReady[f],
//                      kernelStreams[activeSet^1]); }
//                  /* We must have created the event on the last kernel
//                     launch so try to pull R off the GPU. */ else {
//                     pullFrontData(f); } }

                break;

            // At this point, the R factor is ready to be pulled from the GPU.
            case FACTORIZE_COMPLETE:
            {
                /* If we haven't created the event yet, create it. */
                if(eventFrontDataReady[f] == NULL)
                {
                    // Piggyback the synchronization on the next kernel launch.
                    cudaEventCreate(&eventFrontDataReady[f]);
                    cudaEventRecord(eventFrontDataReady[f],
                        kernelStreams[activeSet^1]);
                }
                /* We must have created the event already during factorize,
                   so instead try to pull R off the GPU. */
                else
                {
                    pullFrontData(f);
                }

                /* If the front is dense or staged, then we can't assemble
                   into the parent, so just cleanup. */
                if(isDense || meta->isStaged)
                {
                    nextState = CLEANUP;
                }
                /* Else we're sparse and not staged so it means we have memory
                   to assemble into the parent. */
                else
                {
                    nextState = PARENT_WAIT;
                }
                break;
            }

            /* If we're waiting on the parent to be allocated: */
            case PARENT_WAIT:
            {
                // assert(isSparse);

                /* Make sure we're trying to pull the R factor off the GPU. */
                pullFrontData(f);

                // If we have a parent, allocate it and proceed to PUSH_ASSEMBLE
                Int pids = front->pids;
                if(pids != EMPTY)
                {
                    activateFront(pids);
                    nextState = PUSH_ASSEMBLE;
                }
                /* Else the parent is the dummy, so cleanup and move to done. */
                else
                {
                    nextState = CLEANUP;
                }

                break;
            }

            /* The only time we stay in PUSH_ASSEMBLE is if we can't get to
             * adding the task to the work queue in a particular pass.
             * This happens when we have a ton of other work to do. */
            case PUSH_ASSEMBLE:
                // assert(isSparse);
                break;

            /* If we're in CLEANUP then we need to free the front. */
            case CLEANUP:
            {
                /* If we were able to get the R factor and free the front. */
                if(pullFrontData(f) && finishFront(f))
                {
                    /* Update the parent's child count. */
                    Int pid = front->pids;
                    if(pid != EMPTY) (&frontList[pid])->sparseMeta.nc--;

                    /* Move to DONE. */
                    nextState = DONE;

                    /* Keep track of the # completed. */
                    numFrontsCompleted++;

                    /* Revisit the same position again since a front was
                     * swapped to the current location. */
                    p--;
                }
                break;
            }

            /* This is the done state with nothing to do. */
            case DONE:
                break;
        }

#if 0
        if(front->printMe)
        {
            printf("[PostProcessing] %g : %d -> %d\n", (double) (front->fidg),
                state, nextState);
                // StateNames[state], StateNames[nextState]);
            debugDumpFront(front);
        }
#endif

        /* Save the next state back to the frontDescriptor. */
        front->state = nextState;
    }

    // printf("%2.2f completed.\n", 100 * (double) numCompleted / (double)
    // numFronts);

    /* Return whether all the fronts are DONE. */
    return (numFronts == numFrontsCompleted);
}