/* Might be more convenient to split l and b stuff for CAL */ static void getSplitLBTrig(const AstronomyParameters* ap, const IntegralArea* ia, LTrigPair** lTrigBCosOut, real** bTrigOut) { CALuint i, j; LTrigPair* lTrigBCos; real* bTrig; LBTrig* lbts; size_t idx; CALboolean transpose = CAL_TRUE; lTrigBCos = (LTrigPair*) mwMallocA(ia->mu_steps * ia->nu_steps * sizeof(LTrigPair)); bTrig = (real*) mwMallocA(ia->mu_steps * ia->nu_steps * sizeof(real)); lbts = precalculateLBTrig(ap, ia, transpose); for (i = 0; i < ia->nu_steps; ++i) { for (j = 0; j < ia->mu_steps; ++j) { idx = transpose ? j * ia->nu_steps + i : i * ia->mu_steps + j; lTrigBCos[idx].lCosBCos = lbts[idx].lCosBCos; lTrigBCos[idx].lSinBCos = lbts[idx].lSinBCos; bTrig[idx] = lbts[idx].bSin; } } mwFreeA(lbts); *lTrigBCosOut = lTrigBCos; *bTrigOut = bTrig; }
static void getSplitLBTrig(const AstronomyParameters* ap, const IntegralArea* ia, LTrigPair** lTrigOut, real** bSinOut) { cl_uint i, j; LTrigPair* lTrig; real* bSin; LBTrig* lbts; size_t idx; lbts = precalculateLBTrig(ap, ia, FALSE); lTrig = (LTrigPair*) mwMallocA(ia->mu_steps * ia->nu_steps * sizeof(LTrigPair)); bSin = (real*) mwMallocA(ia->mu_steps * ia->nu_steps * sizeof(real)); for (i = 0; i < ia->nu_steps; ++i) { for (j = 0; j < ia->mu_steps; ++j) { idx = i * ia->mu_steps + j; lTrig[idx].lCosBCos = lbts[idx].lCosBCos; lTrig[idx].lSinBCos = lbts[idx].lSinBCos; bSin[idx] = lbts[idx].bSin; } } mwFreeA(lbts); *lTrigOut = lTrig; *bSinOut = bSin; }
StreamGauss getStreamGauss(int convolve) { int i; StreamGauss sg; real* qgaus_X; qgaus_X = (real*) mwMallocA(sizeof(real) * convolve); sg.qgaus_W = (real*) mwMallocA(sizeof(real) * convolve); gaussLegendre(-1.0, 1.0, qgaus_X, sg.qgaus_W, convolve); sg.dx = (real*) mwMallocA(sizeof(real) * convolve); /*Using old (single-sided gaussian stdev = 0.6) to spread points. This is a small simplification when using modfit, but does not cause any problems since it is parameter independent. The weights will be calculated later based on the two-sided gaussian.*/ for (i = 0; i < convolve; ++i) { sg.dx[i] = 3.0 * stdev * qgaus_X[i]; } mwFreeA(qgaus_X); return sg; }
StreamGauss getStreamGauss(const unsigned int convolve) { unsigned int i; StreamGauss sg; real* qgaus_X; qgaus_X = (real*) mwMallocA(sizeof(real) * convolve); sg.qgaus_W = (real*) mwMallocA(sizeof(real) * convolve); gaussLegendre(-1.0, 1.0, qgaus_X, sg.qgaus_W, convolve); sg.dx = (real*) mwMallocA(sizeof(real) * convolve); for (i = 0; i < convolve; ++i) sg.dx[i] = 3.0 * stdev * qgaus_X[i]; mwFreeA(qgaus_X); #ifdef ANDROID sg.dx_intfp = (IntFp*) mwMallocA(sizeof(IntFp) * convolve); for (int i=0; i < convolve; i++) fp_to_intfp(sg.dx[i],&sg.dx_intfp[i]); #endif return sg; }
/* TODO: Doesn't clone tree or CL stuffs */ void cloneNBodyState(NBodyState* st, const NBodyState* oldSt) { static const NBodyTree emptyTree = EMPTY_TREE; unsigned int nbody = oldSt->nbody; st->tree = emptyTree; st->tree.rsize = oldSt->tree.rsize; st->freeCell = NULL; st->lastCheckpoint = oldSt->lastCheckpoint; st->step = oldSt->step; st->nbody = oldSt->nbody; st->effNBody = oldSt->effNBody; st->ignoreResponsive = oldSt->ignoreResponsive; st->usesExact = oldSt->usesExact; st->usesQuad = oldSt->usesQuad, st->dirty = oldSt->dirty; st->usesCL = oldSt->usesCL; st->reportProgress = oldSt->reportProgress; st->treeIncest = oldSt->treeIncest; st->tree.structureError = oldSt->tree.structureError; assert(nbody > 0); assert(st->bodytab == NULL && st->acctab == NULL); st->bodytab = (Body*) mwMallocA(nbody * sizeof(Body)); memcpy(st->bodytab, oldSt->bodytab, nbody * sizeof(Body)); st->acctab = (mwvector*) mwMallocA(nbody * sizeof(mwvector)); memcpy(st->acctab, oldSt->acctab, nbody * sizeof(mwvector)); st->orbitTrace = (mwvector*) mwMallocA(N_ORBIT_TRACE_POINTS * sizeof(mwvector)); memcpy(st->orbitTrace, oldSt->orbitTrace, N_ORBIT_TRACE_POINTS * sizeof(mwvector)); if (st->ci) { mw_panic("OpenCL NBodyState cloning not implemented\n"); /* st->ci = (CLInfo*) mwCalloc(1, sizeof(CLInfo)); st->nbb = (NBodyBuffers*) mwCalloc(1, sizeof(NBodyBuffers)); memcpy(st->ci, oldSt->ci, sizeof(CLInfo)); clRetainContext(oldSt->ci->clctx); clRetainProgram(oldSt->ci->prog); clRetainCommandQueue(oldSt->ci->queue); // copy buffers mwDuplicateBuffer(st->ci, oldSt->nbb.blah) */ } }
void setInitialNBodyState(NBodyState* st, const NBodyCtx* ctx, Body* bodies, int nbody) { static const NBodyTree emptyTree = EMPTY_TREE; static const mwvector maxV = mw_vec(REAL_MAX, REAL_MAX, REAL_MAX); int i; st->tree = emptyTree; st->freeCell = NULL; st->usesQuad = ctx->useQuad; st->usesExact = (ctx->criterion == Exact); st->tree.rsize = ctx->treeRSize; st->step = 0; st->nbody = nbody; st->bodytab = bodies; st->orbitTrace = (mwvector*) mwMallocA(N_ORBIT_TRACE_POINTS * sizeof(mwvector)); for (i = 0; i < N_ORBIT_TRACE_POINTS; ++i) { st->orbitTrace[i] = maxV; } /* The tests may step the system from an arbitrary place, so make sure this is 0'ed */ st->acctab = (mwvector*) mwCallocA(nbody, sizeof(mwvector)); }
LBTrig* precalculateLBTrig(const AstronomyParameters* ap, const IntegralArea* ia, int transpose) { unsigned int i, j, idx; LBTrig* lbts; NuId nuid; LB lb; real mu; lbts = (LBTrig*) mwMallocA(sizeof(LBTrig) * ia->nu_steps * ia->mu_steps); for (i = 0; i < ia->nu_steps; ++i) { nuid = calcNuStep(ia, i); for (j = 0; j < ia->mu_steps; ++j) { mu = ia->mu_min + (((real) j + 0.5) * ia->mu_step_size); lb = gc2lb(ap->wedge, mu, nuid.nu); idx = transpose ? j * ia->nu_steps + i : i * ia->mu_steps + j; lbts[idx] = lb_trig(lb); } } return lbts; }
StreamConstants* getStreamConstants(const AstronomyParameters* ap, const Streams* streams) { int i; StreamConstants* sc; real stream_sigma; real sigma_sq2; sc = (StreamConstants*) mwMallocA(streams->number_streams * sizeof(StreamConstants)); for (i = 0; i < streams->number_streams; ++i) { stream_sigma = streams->parameters[i].sigma; if (stream_sigma == 0.0) { mw_printf("stream sigma 0.0 is invalid\n"); mwFreeA(sc); return NULL; } sc[i].large_sigma = (stream_sigma > SIGMA_LIMIT || stream_sigma < -SIGMA_LIMIT); sigma_sq2 = 2.0 * sqr(stream_sigma); sc[i].sigma_sq2_inv = 1.0 / sigma_sq2; sc[i].a = streamA(&streams->parameters[i]); sc[i].c = streamC(ap, ap->wedge, streams->parameters[i].mu, streams->parameters[i].r); } return sc; }
static int evaluateIntegralAreas(lua_State* luaSt) { int i, table; lua_getglobal(luaSt, AREAS_NAME); table = lua_gettop(luaSt); mw_lua_checktable(luaSt, table); _nCut = luaL_getn(luaSt, table); if (_nCut == 0) { lua_pop(luaSt, 1); return luaL_error(luaSt, "At least one cut required"); } _ias = mwMallocA(_nCut * sizeof(IntegralArea)); for (i = 0; i < (int) _nCut; ++i) { lua_rawgeti(luaSt, table, i + 1); readIntegralArea(luaSt, &_ias[i], lua_gettop(luaSt)); lua_pop(luaSt, 1); } lua_pop(luaSt, 1); return 0; }
static int evaluateStreams(lua_State* luaSt) { int table; int i, n; lua_getglobal(luaSt, STREAMS_NAME); table = lua_gettop(luaSt); if (expectTable(luaSt, table)) luaL_error(luaSt, "Expected '%s' to be a table", STREAMS_NAME); n = luaL_getn(luaSt, table); /* CHECKME: Is this valid? */ if (n == 0) { lua_pop(luaSt, 1); return 0; } _streams->number_streams = n; _streams->parameters = mwMallocA(n * sizeof(StreamParameters)); for (i = 0; i < n; ++i) { lua_rawgeti(luaSt, table, i + 1); readStreamTable(luaSt, &_streams->parameters[i], lua_gettop(luaSt)); lua_pop(luaSt, 1); } lua_pop(luaSt, 1); return 0; }
StreamConstants* getStreamConstants(const AstronomyParameters* ap, const Streams* streams) { unsigned int i; StreamConstants* sc; real stream_sigma; real sigma_sq2; sc = (StreamConstants*) mwMallocA(sizeof(StreamConstants) * streams->number_streams); for (i = 0; i < streams->number_streams; i++) { stream_sigma = streams->parameters[i].sigma; sc[i].large_sigma = (stream_sigma > SIGMA_LIMIT || stream_sigma < -SIGMA_LIMIT); sigma_sq2 = 2.0 * sqr(stream_sigma); sc[i].sigma_sq2_inv = 1.0 / sigma_sq2; sc[i].a = streamA(&streams->parameters[i]); sc[i].c = streamC(ap, ap->wedge, streams->parameters[i].mu, streams->parameters[i].r); } return sc; }
NuConstants* prepareNuConstants(unsigned int nu_steps, real nu_step_size, real nu_min) { unsigned int i; real tmp1, tmp2; NuConstants* nu_consts; nu_consts = (NuConstants*) mwMallocA(sizeof(NuConstants) * nu_steps); for (i = 0; i < nu_steps; ++i) { nu_consts[i].nu = nu_min + (i * nu_step_size); tmp1 = d2r(90.0 - nu_consts[i].nu - nu_step_size); tmp2 = d2r(90.0 - nu_consts[i].nu); nu_consts[i].id = mw_cos(tmp1) - mw_cos(tmp2); nu_consts[i].nu += 0.5 * nu_step_size; } return nu_consts; }
int evaluate(SeparationResults* results, const AstronomyParameters* ap, const IntegralArea* ias, const Streams* streams, const StreamConstants* sc, const char* star_points_file, const CLRequest* clr, int do_separation, int ignoreCheckpoint, const char* separation_outfile) { int rc = 0; EvaluationState* es; StreamGauss sg; GPUInfo ci; StarPoints sp = EMPTY_STAR_POINTS; int useImages = FALSE; /* Only applies to CL version */ #ifdef ANDROID StreamConstantsIntFp* sci; int armExt = mwDetectARMExt(); #endif memset(&ci, 0, sizeof(ci)); probabilityFunctionDispatch(ap, clr); es = newEvaluationState(ap); sg = getStreamGauss(ap->convolve); #if SEPARATION_GRAPHICS if (separationInitSharedEvaluationState(es)) warn("Failed to initialize shared evaluation state\n"); #endif /* SEPARATION_GRAPHICS */ if (!ignoreCheckpoint) { if (resolveCheckpoint()) fail("Failed to resolve checkpoint file '%s'\n", CHECKPOINT_FILE); if (maybeResume(es)) fail("Failed to resume checkpoint\n"); } #if SEPARATION_OPENCL if (setupSeparationCL(&ci, ap, ias, clr, &useImages) != CL_SUCCESS) fail("Failed to setup CL\n"); #elif SEPARATION_CAL if (separationCALInit(&ci, clr) != CAL_RESULT_OK) fail("Failed to setup CAL\n"); #endif #ifdef ANDROID if (armExt==ARM_CPU_NOVFP && ap->fast_h_prob) { int i=0; unsigned int nstreams = ap->number_streams; unsigned int convolve = ap->convolve; warn("Use IntFp Engine\n"); sci = (StreamConstantsIntFp*) mwMallocA(sizeof(StreamConstantsIntFp) * ap->number_streams); for (i=0; i < nstreams; i++) { fp_to_intfp(sc[i].a.x,&(sci[i].a[0])); fp_to_intfp(sc[i].a.y,&sci[i].a[1]); fp_to_intfp(sc[i].a.z,&sci[i].a[2]); fp_to_intfp(sc[i].a.w,&sci[i].a[3]); fp_to_intfp(-sc[i].c.x,&sci[i].c[0]); fp_to_intfp(-sc[i].c.y,&sci[i].c[1]); fp_to_intfp(-sc[i].c.z,&sci[i].c[2]); fp_to_intfp(-sc[i].c.w,&sci[i].c[3]); fp_to_intfp(sc[i].sigma_sq2_inv,&sci[i].sigma_sq2_inv); } } #endif #ifdef ANDROID if (armExt == ARM_CPU_NOVFP && ap->fast_h_prob) calculateIntegralsIntFp(ap, ias, sci, sg, es, clr, &ci, useImages); else calculateIntegrals(ap, ias, sc, sg, es, clr, &ci, useImages); #else calculateIntegrals(ap, ias, sc, sg, es, clr, &ci, useImages); #endif if (!ignoreCheckpoint) { finalCheckpoint(es); } getFinalIntegrals(results, es, ap->number_streams, ap->number_integrals); freeEvaluationState(es); if (readStarPoints(&sp, star_points_file)) { rc = 1; warn("Failed to read star points file\n"); } else { /* TODO: likelihood on GPU with OpenCL. Make this less of a * mess. The different versions should appear to be the * same. */ #if SEPARATION_CAL if (do_separation) { /* No separation on GPU */ rc = likelihood(results, ap, &sp, sc, streams, sg, do_separation, separation_outfile); } else { //rc = likelihoodCAL(results, ap, &sp, sc, streams, sg, clr, &ci); rc = likelihood(results, ap, &sp, sc, streams, sg, do_separation, separation_outfile); } #else #ifdef ANDROID if (armExt == ARM_CPU_NOVFP && ap->fast_h_prob) rc = likelihood_intfp(results, ap, &sp, sci, streams, sg, do_separation, separation_outfile); else rc = likelihood(results, ap, &sp, sc, streams, sg, do_separation, separation_outfile); #else rc = likelihood(results, ap, &sp, sc, streams, sg, do_separation, separation_outfile); #endif #endif /* SEPARATION_CAL */ rc |= checkSeparationResults(results, ap->number_streams); } freeStarPoints(&sp); freeStreamGauss(sg); #if SEPARATION_OPENCL mwDestroyCLInfo(&ci); #elif SEPARATION_CAL mwCALShutdown(&ci); #endif #ifdef ANDROID if (armExt == ARM_CPU_NOVFP && ap->fast_h_prob) mwFreeA(sci); #endif return rc; }