コード例 #1
0
/* Might be more convenient to split l and b stuff for CAL */
static void getSplitLBTrig(const AstronomyParameters* ap,
                           const IntegralArea* ia,
                           LTrigPair** lTrigBCosOut,
                           real** bTrigOut)
{
    CALuint i, j;
    LTrigPair* lTrigBCos;
    real* bTrig;
    LBTrig* lbts;
    size_t idx;
    CALboolean transpose = CAL_TRUE;

    lTrigBCos = (LTrigPair*) mwMallocA(ia->mu_steps * ia->nu_steps * sizeof(LTrigPair));
    bTrig = (real*) mwMallocA(ia->mu_steps * ia->nu_steps * sizeof(real));

    lbts = precalculateLBTrig(ap, ia, transpose);

    for (i = 0; i < ia->nu_steps; ++i)
    {
        for (j = 0; j < ia->mu_steps; ++j)
        {
            idx = transpose ? j * ia->nu_steps + i : i * ia->mu_steps + j;

            lTrigBCos[idx].lCosBCos = lbts[idx].lCosBCos;
            lTrigBCos[idx].lSinBCos = lbts[idx].lSinBCos;

            bTrig[idx] = lbts[idx].bSin;
        }
    }

    mwFreeA(lbts);

    *lTrigBCosOut = lTrigBCos;
    *bTrigOut = bTrig;
}
コード例 #2
0
static void getSplitLBTrig(const AstronomyParameters* ap,
                           const IntegralArea* ia,
                           LTrigPair** lTrigOut,
                           real** bSinOut)
{
    cl_uint i, j;
    LTrigPair* lTrig;
    real* bSin;
    LBTrig* lbts;
    size_t idx;

    lbts = precalculateLBTrig(ap, ia, FALSE);

    lTrig = (LTrigPair*) mwMallocA(ia->mu_steps * ia->nu_steps * sizeof(LTrigPair));
    bSin = (real*) mwMallocA(ia->mu_steps * ia->nu_steps * sizeof(real));

    for (i = 0; i < ia->nu_steps; ++i)
    {
        for (j = 0; j < ia->mu_steps; ++j)
        {
            idx = i * ia->mu_steps + j;

            lTrig[idx].lCosBCos = lbts[idx].lCosBCos;
            lTrig[idx].lSinBCos = lbts[idx].lSinBCos;

            bSin[idx] = lbts[idx].bSin;
        }
    }

    mwFreeA(lbts);

    *lTrigOut = lTrig;
    *bSinOut = bSin;
}
コード例 #3
0
StreamGauss getStreamGauss(int convolve)
{
    int i;
    StreamGauss sg;
    real* qgaus_X;

    qgaus_X = (real*) mwMallocA(sizeof(real) * convolve);
    sg.qgaus_W = (real*) mwMallocA(sizeof(real) * convolve);

    gaussLegendre(-1.0, 1.0, qgaus_X, sg.qgaus_W, convolve);

    sg.dx = (real*) mwMallocA(sizeof(real) * convolve);

    /*Using old (single-sided gaussian stdev = 0.6) to spread points.  This is a small simplification when using 
    modfit, but does not cause any problems since it is parameter independent.  The weights will be calculated 
    later based on the two-sided gaussian.*/

    for (i = 0; i < convolve; ++i)
    {
        sg.dx[i] = 3.0 * stdev * qgaus_X[i];
    }

    mwFreeA(qgaus_X);

    return sg;
}
コード例 #4
0
StreamGauss getStreamGauss(const unsigned int convolve)
{
    unsigned int i;
    StreamGauss sg;
    real* qgaus_X;

    qgaus_X = (real*) mwMallocA(sizeof(real) * convolve);
    sg.qgaus_W = (real*) mwMallocA(sizeof(real) * convolve);

    gaussLegendre(-1.0, 1.0, qgaus_X, sg.qgaus_W, convolve);

    sg.dx = (real*) mwMallocA(sizeof(real) * convolve);

    for (i = 0; i < convolve; ++i)
        sg.dx[i] = 3.0 * stdev * qgaus_X[i];

    mwFreeA(qgaus_X);
    
#ifdef ANDROID
    sg.dx_intfp = (IntFp*) mwMallocA(sizeof(IntFp) * convolve);
    for (int i=0; i < convolve; i++)
        fp_to_intfp(sg.dx[i],&sg.dx_intfp[i]);
#endif

    return sg;
}
コード例 #5
0
/* TODO: Doesn't clone tree or CL stuffs */
void cloneNBodyState(NBodyState* st, const NBodyState* oldSt)
{
    static const NBodyTree emptyTree = EMPTY_TREE;
    unsigned int nbody = oldSt->nbody;

    st->tree = emptyTree;
    st->tree.rsize = oldSt->tree.rsize;

    st->freeCell = NULL;

    st->lastCheckpoint = oldSt->lastCheckpoint;
    st->step           = oldSt->step;
    st->nbody          = oldSt->nbody;
    st->effNBody       = oldSt->effNBody;

    st->ignoreResponsive = oldSt->ignoreResponsive;
    st->usesExact = oldSt->usesExact;
    st->usesQuad = oldSt->usesQuad,
    st->dirty = oldSt->dirty;
    st->usesCL = oldSt->usesCL;
    st->reportProgress = oldSt->reportProgress;

    st->treeIncest = oldSt->treeIncest;
    st->tree.structureError = oldSt->tree.structureError;

    assert(nbody > 0);
    assert(st->bodytab == NULL && st->acctab == NULL);

    st->bodytab = (Body*) mwMallocA(nbody * sizeof(Body));
    memcpy(st->bodytab, oldSt->bodytab, nbody * sizeof(Body));

    st->acctab = (mwvector*) mwMallocA(nbody * sizeof(mwvector));
    memcpy(st->acctab, oldSt->acctab, nbody * sizeof(mwvector));

    st->orbitTrace = (mwvector*) mwMallocA(N_ORBIT_TRACE_POINTS * sizeof(mwvector));
    memcpy(st->orbitTrace, oldSt->orbitTrace, N_ORBIT_TRACE_POINTS * sizeof(mwvector));

    if (st->ci)
    {
        mw_panic("OpenCL NBodyState cloning not implemented\n");

        /*
        st->ci = (CLInfo*) mwCalloc(1, sizeof(CLInfo));
        st->nbb = (NBodyBuffers*) mwCalloc(1, sizeof(NBodyBuffers));

        memcpy(st->ci, oldSt->ci, sizeof(CLInfo));

        clRetainContext(oldSt->ci->clctx);
        clRetainProgram(oldSt->ci->prog);
        clRetainCommandQueue(oldSt->ci->queue);

        // copy buffers
        mwDuplicateBuffer(st->ci, oldSt->nbb.blah)
        */
    }

}
コード例 #6
0
void setInitialNBodyState(NBodyState* st, const NBodyCtx* ctx, Body* bodies, int nbody)
{
    static const NBodyTree emptyTree = EMPTY_TREE;
    static const mwvector maxV = mw_vec(REAL_MAX, REAL_MAX, REAL_MAX);
    int i;

    st->tree = emptyTree;
    st->freeCell = NULL;
    st->usesQuad = ctx->useQuad;
    st->usesExact = (ctx->criterion == Exact);

    st->tree.rsize = ctx->treeRSize;
    st->step = 0;
    st->nbody = nbody;
    st->bodytab = bodies;


    st->orbitTrace = (mwvector*) mwMallocA(N_ORBIT_TRACE_POINTS * sizeof(mwvector));
    for (i = 0; i < N_ORBIT_TRACE_POINTS; ++i)
    {
        st->orbitTrace[i] = maxV;
    }

    /* The tests may step the system from an arbitrary place, so make sure this is 0'ed */
    st->acctab = (mwvector*) mwCallocA(nbody, sizeof(mwvector));
}
コード例 #7
0
LBTrig* precalculateLBTrig(const AstronomyParameters* ap,
                           const IntegralArea* ia,
                           int transpose)
{
    unsigned int i, j, idx;
    LBTrig* lbts;
    NuId nuid;
    LB lb;
    real mu;

    lbts = (LBTrig*) mwMallocA(sizeof(LBTrig) * ia->nu_steps * ia->mu_steps);

    for (i = 0; i < ia->nu_steps; ++i)
    {
        nuid = calcNuStep(ia, i);
        for (j = 0; j < ia->mu_steps; ++j)
        {
            mu = ia->mu_min + (((real) j + 0.5) * ia->mu_step_size);
            lb = gc2lb(ap->wedge, mu, nuid.nu);
            idx = transpose ? j * ia->nu_steps + i : i * ia->mu_steps + j;
            lbts[idx] = lb_trig(lb);
        }
    }

    return lbts;
}
コード例 #8
0
StreamConstants* getStreamConstants(const AstronomyParameters* ap, const Streams* streams)
{
    int i;
    StreamConstants* sc;
    real stream_sigma;
    real sigma_sq2;

    sc = (StreamConstants*) mwMallocA(streams->number_streams * sizeof(StreamConstants));

    for (i = 0; i < streams->number_streams; ++i)
    {
        stream_sigma = streams->parameters[i].sigma;

        if (stream_sigma == 0.0)
        {
            mw_printf("stream sigma 0.0 is invalid\n");
            mwFreeA(sc);
            return NULL;
        }

        sc[i].large_sigma = (stream_sigma > SIGMA_LIMIT || stream_sigma < -SIGMA_LIMIT);
        sigma_sq2 = 2.0 * sqr(stream_sigma);

        sc[i].sigma_sq2_inv = 1.0 / sigma_sq2;

        sc[i].a = streamA(&streams->parameters[i]);
        sc[i].c = streamC(ap,
                          ap->wedge,
                          streams->parameters[i].mu,
                          streams->parameters[i].r);
    }

    return sc;
}
コード例 #9
0
static int evaluateIntegralAreas(lua_State* luaSt)
{
    int i, table;

    lua_getglobal(luaSt, AREAS_NAME);

    table = lua_gettop(luaSt);
    mw_lua_checktable(luaSt, table);

    _nCut = luaL_getn(luaSt, table);

    if (_nCut == 0)
    {
        lua_pop(luaSt, 1);
        return luaL_error(luaSt, "At least one cut required");
    }

    _ias = mwMallocA(_nCut * sizeof(IntegralArea));

    for (i = 0; i < (int) _nCut; ++i)
    {
        lua_rawgeti(luaSt, table, i + 1);
        readIntegralArea(luaSt, &_ias[i], lua_gettop(luaSt));
        lua_pop(luaSt, 1);
    }

    lua_pop(luaSt, 1);
    return 0;
}
コード例 #10
0
static int evaluateStreams(lua_State* luaSt)
{
    int table;
    int i, n;

    lua_getglobal(luaSt, STREAMS_NAME);
    table = lua_gettop(luaSt);

    if (expectTable(luaSt, table))
        luaL_error(luaSt, "Expected '%s' to be a table", STREAMS_NAME);


    n = luaL_getn(luaSt, table);

    /* CHECKME: Is this valid? */
    if (n == 0)
    {
        lua_pop(luaSt, 1);
        return 0;
    }

    _streams->number_streams = n;
    _streams->parameters = mwMallocA(n * sizeof(StreamParameters));

    for (i = 0; i < n; ++i)
    {
        lua_rawgeti(luaSt, table, i + 1);
        readStreamTable(luaSt, &_streams->parameters[i], lua_gettop(luaSt));
        lua_pop(luaSt, 1);
    }

    lua_pop(luaSt, 1);
    return 0;
}
コード例 #11
0
StreamConstants* getStreamConstants(const AstronomyParameters* ap, const Streams* streams)

{
    unsigned int i;
    StreamConstants* sc;
    real stream_sigma;
    real sigma_sq2;

    sc = (StreamConstants*) mwMallocA(sizeof(StreamConstants) * streams->number_streams);

    for (i = 0; i < streams->number_streams; i++)
    {
        stream_sigma = streams->parameters[i].sigma;
        sc[i].large_sigma = (stream_sigma > SIGMA_LIMIT || stream_sigma < -SIGMA_LIMIT);
        sigma_sq2 = 2.0 * sqr(stream_sigma);
        sc[i].sigma_sq2_inv = 1.0 / sigma_sq2;

        sc[i].a = streamA(&streams->parameters[i]);
        sc[i].c = streamC(ap,
                          ap->wedge,
                          streams->parameters[i].mu,
                          streams->parameters[i].r);
    }

    return sc;
}
コード例 #12
0
NuConstants* prepareNuConstants(unsigned int nu_steps, real nu_step_size, real nu_min)
{
    unsigned int i;
    real tmp1, tmp2;
    NuConstants* nu_consts;

    nu_consts = (NuConstants*) mwMallocA(sizeof(NuConstants) * nu_steps);

    for (i = 0; i < nu_steps; ++i)
    {
        nu_consts[i].nu = nu_min + (i * nu_step_size);

        tmp1 = d2r(90.0 - nu_consts[i].nu - nu_step_size);
        tmp2 = d2r(90.0 - nu_consts[i].nu);

        nu_consts[i].id = mw_cos(tmp1) - mw_cos(tmp2);
        nu_consts[i].nu += 0.5 * nu_step_size;
    }

    return nu_consts;
}
コード例 #13
0
int evaluate(SeparationResults* results,
             const AstronomyParameters* ap,
             const IntegralArea* ias,
             const Streams* streams,
             const StreamConstants* sc,
             const char* star_points_file,
             const CLRequest* clr,
             int do_separation,
             int ignoreCheckpoint,
             const char* separation_outfile)
{
    int rc = 0;
    EvaluationState* es;
    StreamGauss sg;
    GPUInfo ci;
    StarPoints sp = EMPTY_STAR_POINTS;
    int useImages = FALSE; /* Only applies to CL version */
#ifdef ANDROID
    StreamConstantsIntFp* sci;
    int armExt = mwDetectARMExt();
#endif
    
    memset(&ci, 0, sizeof(ci));

    probabilityFunctionDispatch(ap, clr);

    es = newEvaluationState(ap);
    sg = getStreamGauss(ap->convolve);

  #if SEPARATION_GRAPHICS
    if (separationInitSharedEvaluationState(es))
        warn("Failed to initialize shared evaluation state\n");
  #endif /* SEPARATION_GRAPHICS */

    if (!ignoreCheckpoint)
    {
        if (resolveCheckpoint())
            fail("Failed to resolve checkpoint file '%s'\n", CHECKPOINT_FILE);

        if (maybeResume(es))
            fail("Failed to resume checkpoint\n");
    }

  #if SEPARATION_OPENCL
    if (setupSeparationCL(&ci, ap, ias, clr, &useImages) != CL_SUCCESS)
        fail("Failed to setup CL\n");
  #elif SEPARATION_CAL
    if (separationCALInit(&ci, clr) != CAL_RESULT_OK)
        fail("Failed to setup CAL\n");
  #endif

#ifdef ANDROID
    if (armExt==ARM_CPU_NOVFP && ap->fast_h_prob)
    {
        int i=0;
        unsigned int nstreams = ap->number_streams;
        unsigned int convolve = ap->convolve;
        warn("Use IntFp Engine\n");
        sci = (StreamConstantsIntFp*) mwMallocA(sizeof(StreamConstantsIntFp) * ap->number_streams);
        for (i=0; i < nstreams; i++)
        {
            fp_to_intfp(sc[i].a.x,&(sci[i].a[0]));
            fp_to_intfp(sc[i].a.y,&sci[i].a[1]);
            fp_to_intfp(sc[i].a.z,&sci[i].a[2]);
            fp_to_intfp(sc[i].a.w,&sci[i].a[3]);
            fp_to_intfp(-sc[i].c.x,&sci[i].c[0]);
            fp_to_intfp(-sc[i].c.y,&sci[i].c[1]);
            fp_to_intfp(-sc[i].c.z,&sci[i].c[2]);
            fp_to_intfp(-sc[i].c.w,&sci[i].c[3]);
            fp_to_intfp(sc[i].sigma_sq2_inv,&sci[i].sigma_sq2_inv);
        }
    }
#endif
       
#ifdef ANDROID
    if (armExt == ARM_CPU_NOVFP && ap->fast_h_prob)
        calculateIntegralsIntFp(ap, ias, sci, sg, es, clr, &ci, useImages);
    else
        calculateIntegrals(ap, ias, sc, sg, es, clr, &ci, useImages);
#else
    calculateIntegrals(ap, ias, sc, sg, es, clr, &ci, useImages);
#endif

    if (!ignoreCheckpoint)
    {
        finalCheckpoint(es);
    }

    getFinalIntegrals(results, es, ap->number_streams, ap->number_integrals);
    freeEvaluationState(es);

    if (readStarPoints(&sp, star_points_file))
    {
        rc = 1;
        warn("Failed to read star points file\n");
    }
    else
    {
        /* TODO: likelihood on GPU with OpenCL. Make this less of a
         * mess. The different versions should appear to be the
         * same. */

      #if SEPARATION_CAL
        if (do_separation)
        {
            /* No separation on GPU */
            rc = likelihood(results, ap, &sp, sc, streams, sg, do_separation, separation_outfile);
        }
        else
        {
            //rc = likelihoodCAL(results, ap, &sp, sc, streams, sg, clr, &ci);
            rc = likelihood(results, ap, &sp, sc, streams, sg, do_separation, separation_outfile);
        }
      #else
#ifdef ANDROID
        if (armExt == ARM_CPU_NOVFP && ap->fast_h_prob)
            rc = likelihood_intfp(results, ap, &sp, sci, streams, sg, do_separation, separation_outfile);
        else
            rc = likelihood(results, ap, &sp, sc, streams, sg, do_separation, separation_outfile);
#else
        rc = likelihood(results, ap, &sp, sc, streams, sg, do_separation, separation_outfile);
#endif
      #endif /* SEPARATION_CAL */

        rc |= checkSeparationResults(results, ap->number_streams);
    }

    freeStarPoints(&sp);
    freeStreamGauss(sg);

  #if SEPARATION_OPENCL
    mwDestroyCLInfo(&ci);
  #elif SEPARATION_CAL
    mwCALShutdown(&ci);
  #endif

#ifdef ANDROID
    if (armExt == ARM_CPU_NOVFP && ap->fast_h_prob)
        mwFreeA(sci);
#endif
    
    return rc;
}