mwvector nbCenterOfMom(const NBodyState* st)
{
    int i;
    const Body* b;
    int nbody = st->nbody;
    mwvector cm = ZERO_VECTOR;
    mwvector tmp;
    Kahan mass;
    Kahan pos[3];

    CLEAR_KAHAN(mass);
    memset(pos, 0, sizeof(pos));

    for (i = 0; i < nbody; ++i)
    {
        b = &st->bodytab[i];
        tmp = mw_mulvs(Vel(b), Mass(b));

        KAHAN_ADD(pos[0], tmp.x);
        KAHAN_ADD(pos[1], tmp.y);
        KAHAN_ADD(pos[2], tmp.z);
        KAHAN_ADD(mass, Mass(b));
    }

    X(cm) = pos[0].sum / mass.sum;
    Y(cm) = pos[1].sum / mass.sum;
    Z(cm) = pos[2].sum / mass.sum;
    W(cm) = mass.sum;

    return cm;
}
/* The GPU checkpointing saves the sum of all resumes in the real integral.
   The temporary area holds the checkpointed sum of the last episode.
   Update the real integral from the results of the last episode.
*/
void addTmpSums(EvaluationState* es)
{
    unsigned int i;

    KAHAN_ADD(es->bgSum, es->bgTmp);
    for (i = 0; i < es->numberStreams; ++i)
        KAHAN_ADD(es->streamSums[i], es->streamTmps[i]);

    es->bgTmp = 0.0;
    memset(es->streamTmps, 0, sizeof(es->streamTmps[0]) * es->numberStreams);
}
static real sumResults(MWMemRes* mr, const IntegralArea* ia)
{
    CALuint i, j, pitch;
    Kahan* bufPtr;
    Kahan* tmp;
    Kahan ksum = ZERO_KAHAN;
    CALresult err = CAL_RESULT_OK;

    err = mapMWMemRes(mr, (CALvoid**) &bufPtr, &pitch);
    if (err != CAL_RESULT_OK)
        return NAN;

    for (i = 0; i < ia->mu_steps; ++i)
    {
        tmp = &bufPtr[i * pitch];
        for (j = 0; j < ia->r_steps; ++j)
        {
            KAHAN_ADD(ksum, tmp[j].sum);
        }
    }

    err = unmapMWMemRes(mr);
    if (err != CAL_RESULT_OK)
        return NAN;

    return ksum.sum + ksum.correction;
}