cl_double mwDeviceEstimateGFLOPs(const DevInfo* di, cl_bool useDouble)
{
    cl_double gflops = 0.0;

    if (di->devType == CL_DEVICE_TYPE_GPU)
    {
        if (mwIsNvidiaGPUDevice(di))
        {
            gflops = mwCUDAEstimateGFLOPs(di, useDouble);
        }
        else if (mwIsAMDGPUDevice(di))
        {
            gflops = mwAMDEstimateGFLOPs(di, useDouble);
        }
        else
        {
            mw_printf("Unhandled GPU vendor '%s' (0x%x)\n", di->vendor, di->vendorID);
            gflops = 100.0;
        }
    }
    else
    {
        mw_printf("Missing flops estimate for device type %s\n", showCLDeviceType(di->devType));
        return 1.0;
    }

    return gflops;
}
/* Query one device specified by type, create a context and command
 * queue, as well as retrieve device information
 */
cl_int mwSetupCL(CLInfo* ci, const CLRequest* clr)
{
    cl_int err;

    err = mwGetCLInfo(ci, clr);
    if (err != CL_SUCCESS)
    {
        mw_printf("Failed to get information about device\n");
        return err;
    }

    err = mwGetDevInfo(&ci->di, ci->dev);
    if (err != CL_SUCCESS)
    {
        mw_printf("Failed to get device info\n");
        return err;
    }

    if (mwIsFirstRun())
    {
        if (clr->verbose)
        {
            mwPrintDevInfo(&ci->di);
        }
        else
        {
            mwPrintDevInfoShort(&ci->di);
        }
    }

    return mwCreateCtxQueue(ci, CL_FALSE, clr->enableProfiling);
}
void nbReportTreeIncest(const NBodyCtx* ctx, NBodyState* st)
{
    if (!st->treeIncest)   /* don't repeat warning */
    {
        st->treeIncest = TRUE;

        if (!ctx->quietErrors) /* Avoid massive printing of tests causing incest */
        {
            if (ctx->allowIncest)
            {
                mw_printf("[tree-incest detected at step %u / %u (%f%%)]\n",
                          st->step,
                          ctx->nStep,
                          100.0 * (real) st->step / (real) ctx->nStep
                         );
            }
            else
            {
                mw_printf("tree-incest detected (fatal) at step %u / %u (%f%%)\n",
                          st->step,
                          ctx->nStep,
                          100.0 * (real) st->step / (real) ctx->nStep
                         );
            }
        }
    }
}
static IntegralArea* prepareParameters(const SeparationFlags* sf,
                                       AstronomyParameters* ap,
                                       BackgroundParameters* bgp,
                                       Streams* streams)
{
    IntegralArea* ias;

    ias = setupSeparation(ap, bgp, streams, sf);
    /* Try the new file first. If that doesn't work, try the old one. */
    if (!ias)
    {
        mw_printf("Error reading astronomy parameters from file '%s'\n"
                  "  Trying old parameters file\n", sf->ap_file);
        ias = readParameters(sf->ap_file, ap, bgp, streams);
    }

    if (!ias)
    {
        mw_printf("Failed to read parameters file\n");
        return NULL;
    }

    if (sf->numArgs && setParameters(ap, bgp, streams, sf->numArgs, sf->nForwardedArgs))
    {
        mwFreeA(ias);
        freeStreams(streams);
        return NULL;
    }

    return ias;
}
int equalBody(const Body* a, const Body* b)
{
    if (Mass(a) != Mass(b))
    {
        mw_printf("mass differ\n");
        return FALSE;
    }
    if (Type(a) != Type(b))
    {
        mw_printf("type ndiffer\n");
        return FALSE;
    }
    if (!equalVector(&Pos(a), &Pos(b)))
    {
        mw_printf("pos differ\n");
        return FALSE;
    }
    if (!equalVector(&Vel(a), &Vel(b)))
    {
        mw_printf("VElocity differ\n");
        return FALSE;
    }

    return TRUE;
}
static int nbglGetExclusiveSceneAccess(scene_t* scene)
{
    int pid = (int) getpid();
    int oldPID = OPA_cas_int(&scene->attachedLock, 0, pid);
    if (oldPID != 0)
    {
        if (mwProcessIsAlive(oldPID))
        {
            mw_printf("Could not get exclusive access to simulation shared segment "
                      "(Owned by process %d)\n",
                      oldPID);
            return 1;
        }
        else
        {
            mw_printf("Simulation shared segment owned by dead process %d, stealing it\n",
                      oldPID);

            /* Process is dead, steal the lock */
            nbglReleaseSceneLocks(scene);
            return 0;
        }
    }
    else
    {
        OPA_store_int(&scene->attachedPID, 0);
        return 0;
    }
}
static void nbPrintVersion(int boincTag, int verbose)
{
    char versionStr[2048];

    snprintf(versionStr, sizeof(versionStr),
             "%s %u.%02u %s %s %s %s %s, %s",
             NBODY_PROJECT_NAME,
             NBODY_VERSION_MAJOR, NBODY_VERSION_MINOR,
             MILKYWAY_SYSTEM_NAME,
             ARCH_STRING,
             PRECSTRING,
             DENORMAL_STRING,
             NBODY_EXTRAVER,
             NBODY_EXTRALIB);

    if (boincTag)
    {
        mw_printf("<search_application> %s </search_application>\n", versionStr);
    }
    else
    {
        mw_printf("%s %s\n",
                  versionStr,
                  BOINC_APPLICATION ? "BOINC" : "");
    }

    if (verbose)
    {
        mw_printf("Commit %s\n", MILKYWAY_GIT_COMMIT_ID);
    }
}
cl_double mwAMDEstimateGFLOPs(const DevInfo* di, cl_bool useDouble)
{
    cl_ulong flops, flopsFloat, flopsDouble;
    cl_double gflops;

    flopsFloat = 2 * (di->maxCompUnits * di->aluPerCU) * (cl_ulong) di->clockFreq * 1000000;
    flopsDouble = flopsFloat / di->doubleFrac;

    mw_printf("Estimated AMD GPU GFLOP/s: %.0f SP GFLOP/s, %.0f DP FLOP/s\n",
              1.0e-9 * (cl_double) flopsFloat,
              1.0e-9 * (cl_double) flopsDouble);

    flops = useDouble ? flopsDouble : flopsFloat;

    gflops = floor(1.0e-9 * (cl_double) flops);

    /* At different times the AMD drivers have reported 0 as the clock
     * speed, so try to catch that. We could test the GPU and figure
     * out what the FLOPs should be to get a better estimate.
     */
    if (gflops <= 100.0)
    {
        mw_printf("Warning: Bizarrely low flops (%.0f). Defaulting to %.0f\n", gflops, 100.0);
        gflops = 100.0;
    }

    return gflops;
}
static int nbSetNumThreads(int numThreads)
{
  #ifdef _OPENMP
    int nProc = omp_get_num_procs();
    int nBoinc = mwGetBoincNumCPU();

    if (nProc <= 0) /* It's happened before... */
    {
        mw_printf("Number of processors %d is crazy\n", nProc);
        return 1;
    }

    /* If command line argument not given, and BOINC gives us a value use that */
    if (numThreads <= 0 && nBoinc > 0)
    {
        numThreads = nBoinc;
    }

    if (numThreads != 0)
    {
        omp_set_num_threads(numThreads);
        mw_printf("Using OpenMP %d max threads on a system with %d processors\n",
                  omp_get_max_threads(),
                  nProc);
    }
  #endif

    return 0;
}
示例#10
0
/* If possible, resume from a checkpoint. Otherwise do the necessary
 * initialization for a new run */
static NBodyStatus nbResumeOrNewRun(NBodyCtx* ctx, NBodyState* st, const NBodyFlags* nbf)
{
    if (nbResolveCheckpoint(st, nbf->checkpointFileName))
    {
        mw_printf("Failed to resolve checkpoint\n");
        return NBODY_ERROR;
    }

    /* If the checkpoint exists (and we want to use it), try to use it */
    if (nbf->ignoreCheckpoint || !nbResolvedCheckpointExists(st))
    {
        if (!nbf->inputFile)
        {
            mw_printf("No input file and no checkpoint\n");
            return NBODY_USER_ERROR;
        }

        if (nbSetup(ctx, st, nbf))
        {
            mw_printf("Failed to read input parameters file\n");
            return NBODY_PARAM_FILE_ERROR;
        }
    }
    else /* Resume from checkpoint */
    {
        if (nbf->inputFile && !BOINC_APPLICATION)
        {
            mw_printf("Warning: input file '%s' unused\n", nbf->inputFile);
        }

        if (nbReadCheckpoint(ctx, st))
        {
            mw_report("Failed to read checkpoint\n");
            return NBODY_CHECKPOINT_ERROR;
        }
        else
        {
            mw_report("Resumed from checkpoint '%s'\n", nbf->checkpointFileName);
        }
    }

    if (ctx->potentialType == EXTERNAL_POTENTIAL_CUSTOM_LUA)
    {
        /* We're using a custom potential, so we'll reevaluate the
         * script. We must do this once per thread.
         */
        if (nbOpenPotentialEvalStatePerThread(st, nbf))
        {
            return NBODY_PARAM_FILE_ERROR;
        }
    }

    return NBODY_SUCCESS;
}
static int worker(const SeparationFlags* sf)
{
    AstronomyParameters ap;
    BackgroundParameters bgp = EMPTY_BACKGROUND_PARAMETERS;
    Streams streams = EMPTY_STREAMS;
    IntegralArea* ias = NULL;
    StreamConstants* sc = NULL;
    SeparationResults* results = NULL;
    int rc;
    CLRequest clr;

    memset(&ap, 0, sizeof(ap));
    memset(&clr, 0, sizeof(clr));

    setCLReqFlags(&clr, sf);
    ias = prepareParameters(sf, &ap, &bgp, &streams);
    if (!ias)
        return 1;

    rc = setAstronomyParameters(&ap, &bgp);
    if (rc)
    {
        mwFreeA(ias);
        freeStreams(&streams);
        return 1;
    }

    setExpStreamWeights(&ap, &streams);
    sc = getStreamConstants(&ap, &streams);
    if (!sc)
    {
        mw_printf("Failed to get stream constants\n");
        mwFreeA(ias);
        freeStreams(&streams);
        return 1;
    }

    results = newSeparationResults(ap.number_streams);

    rc = evaluate(results, &ap, ias, &streams, sc, sf->star_points_file,
                  &clr, sf->do_separation, sf->ignoreCheckpoint, sf->separation_outfile);
    if (rc)
        mw_printf("Failed to calculate likelihood\n");

    printSeparationResults(results, ap.number_streams);

    mwFreeA(ias);
    mwFreeA(sc);
    freeStreams(&streams);
    freeSeparationResults(results);

    return rc;
}
static cl_int runIntegral(CLInfo* ci,
                          SeparationCLMem* cm,
                          RunSizes* runSizes,
                          EvaluationState* es,
                          const CLRequest* clr,
                          const AstronomyParameters* ap,
                          const IntegralArea* ia)
{
    cl_int err = CL_SUCCESS;
    double t1, t2, dt;
    double tAcc = 0.0;

    for (; es->nu_step < ia->nu_steps; es->nu_step++)
    {
        if (clr->enableCheckpointing && timeToCheckpointGPU(es, ia))
        {
            err = checkpointCL(ci, cm, ia, es);
            if (err != CL_SUCCESS)
                break;
        }

        t1 = mwGetTimeMilli();
        err = runNuStep(ci, ia, runSizes, es->nu_step);
        if (err != CL_SUCCESS)
        {
            mwPerrorCL(err, "Failed to run nu step");
            return err;
        }
        t2 = mwGetTimeMilli();

        dt = t2 - t1;
        tAcc += dt;

        reportProgress(ap, ia, es, es->nu_step + 1, dt);
    }

    es->nu_step = 0;

    mw_printf("Integration time: %f s. Average time per iteration = %f ms\n",
              tAcc / 1000.0, tAcc / (double) ia->nu_steps);

    if (err == CL_SUCCESS)
    {
        err = readKernelResults(ci, cm, es, ia);
        if (err != CL_SUCCESS)
            mw_printf("Failed to read final kernel results\n");

        /* Add final episode to running totals */
        addTmpCheckpointSums(es);
    }

    return err;
}
/* Set any flags based on project preferences that weren't specified
 * on the command line.
 *
 * This is a bit convoluted since we need to boinc_init before we read
 * the preferences, but that needs to be delayed until after argument
 * reading in case we want to disable output redirection, and then we
 * still want the command line to supersede the project prefs / the
 * device specified by app_init_data
 */
static void setFlagsFromPreferences(SeparationFlags* flags, const SeparationPrefs* prefs, const char* progName)
{
    if (flags->useDevNumber < 0)
    {
        /* Try to use BOINC's suggestion from app_init_data stuff;
           We might not get it so just use the first device. */
        flags->useDevNumber = mwGetBoincOpenCLDeviceIndex();
        if (flags->useDevNumber < 0)
        {
            flags->useDevNumber = 0;
        }
    }

    if (!flags->preferredPlatformVendor)
    {
        const char* vendor = mwGetBoincOpenCLPlatformVendor();
        if (vendor)
        {
            mw_printf("BOINC GPU type suggests using OpenCL vendor '%s'\n", vendor);
        }
        else
        {
            /* If BOINC doesn't tell us, guess based on the binary name */
            vendor = mwGuessPreferredPlatform(progName);
            if (vendor)
            {
                mw_printf("Guessing preferred OpenCL vendor '%s'\n", vendor);
            }
        }

        flags->preferredPlatformVendor = vendor ? strdup(vendor) : NULL;
    }

    if (flags->targetFrequency <= 0.0)
    {
        flags->targetFrequency = prefs->gpuTargetFrequency;
    }

    if (flags->nonResponsive < 0)
    {
        flags->nonResponsive = prefs->gpuNonResponsive;
    }

    if (flags->processPriority == MW_PRIORITY_INVALID)
    {
        /* For GPU versions, default to using a higher process priority if not set */
        if (SEPARATION_OPENCL && !flags->forceNoOpenCL)
        {
            flags->processPriority = prefs->gpuProcessPriority;
        }
    }
}
static int readIntegralArea(lua_State* luaSt, IntegralArea* iaOut, int table)
{
    uint64_t r, mu, nu;
    static IntegralArea ia;
    static real nuStepsf, muStepsf, rStepsf;
    static const MWNamedArg iaArgTable[] =
        {
            { "nu_min",   LUA_TNUMBER, NULL, TRUE, &ia.nu_min },
            { "nu_max",   LUA_TNUMBER, NULL, TRUE, &ia.nu_max },
            { "nu_steps", LUA_TNUMBER, NULL, TRUE, &nuStepsf  },

            { "mu_min",   LUA_TNUMBER, NULL, TRUE, &ia.mu_min },
            { "mu_max",   LUA_TNUMBER, NULL, TRUE, &ia.mu_max },
            { "mu_steps", LUA_TNUMBER, NULL, TRUE, &muStepsf  },

            { "r_min",    LUA_TNUMBER, NULL, TRUE, &ia.r_min  },
            { "r_max",    LUA_TNUMBER, NULL, TRUE, &ia.r_max  },
            { "r_steps",  LUA_TNUMBER, NULL, TRUE, &rStepsf   },
            END_MW_NAMED_ARG
        };

    handleNamedArgumentTable(luaSt, iaArgTable, table);

    ia.nu_steps = (unsigned int) nuStepsf;
    ia.mu_steps = (unsigned int) muStepsf;
    ia.r_steps = (unsigned int) rStepsf;

    r = (uint64_t) ia.r_steps;
    mu = (uint64_t) ia.mu_steps;
    nu = (uint64_t) ia.nu_steps;

    if (nu == 0 || mu == 0 || r == 0)
    {
        mw_printf("Integral size { %u, %u, %u } cannot be 0\n", nu, mu, r);
        return 1;
    }

    if ((r > UINT64_MAX / mu) || ((r * mu) > UINT64_MAX / nu))
    {
        mw_printf("Integral size { %u, %u, %u } will overflow progress calculation\n",
                  ia.nu_steps, ia.mu_steps, ia.r_steps);
        return 1;
    }

    calcIntegralStepSizes(&ia);

    *iaOut = ia;

    return 0;
}
示例#15
0
static int emdIterateSolution(EMDState* state)
{
    int result;
    float min_delta;
    float eps = EMD_EPS * state->max_cost;

    /* if ssize = 1 or dsize = 1 then we are done, else ... */
    if (state->ssize > 1 && state->dsize > 1)
    {
        int itr;

        for (itr = 1; itr < MAX_ITERATIONS; itr++)
        {
            /* find basic variables */
            result = emdFindBasicVariables(state->cost, state->is_x,
                                           state->u, state->v, state->ssize, state->dsize);
            if (result < 0)
            {
                break;
            }

            /* check for optimality */
            min_delta = emdIsOptimal(state->cost, state->is_x,
                                     state->u, state->v,
                                     state->ssize, state->dsize, state->enter_x);

            if (min_delta == EMD_INF)
            {
                mw_printf("Iteration didn't converge");
                return 1;
            }

            /* if no negative deltamin, we found the optimal solution */
            if (min_delta >= -eps)
            {
                break;
            }

            /* improve solution */
            if (!emdNewSolution(state))
            {
                mw_printf("Iteration didn't converge");
                return 1;
            }
        }
    }

    return 0;
}
int pushType(lua_State* luaSt, const char* typeName, size_t typeSize, void* p)
{
    void* lp;

    lp = lua_newuserdata(luaSt, typeSize);
    if (!lp)
    {
        mw_printf("Creating userdata '%s' failed\n", typeName);
        return 0;
    }

    assert((uintptr_t) lp % MW_LUA_ALIGN == 0); /* This must be true for dSFMT intrinsics stuff to work */

    luaL_getmetatable(luaSt, typeName);
    lua_setmetatable(luaSt, -2);

#if 0
    /* Give this object a new function environment; for installing
     * arbitrary lua functions into a type */
    lua_newtable(luaSt);
    lua_setfenv(luaSt, -2);
#endif

    memcpy(lp, p, typeSize);

    return 1;
}
StreamConstants* getStreamConstants(const AstronomyParameters* ap, const Streams* streams)
{
    int i;
    StreamConstants* sc;
    real stream_sigma;
    real sigma_sq2;

    sc = (StreamConstants*) mwMallocA(streams->number_streams * sizeof(StreamConstants));

    for (i = 0; i < streams->number_streams; ++i)
    {
        stream_sigma = streams->parameters[i].sigma;

        if (stream_sigma == 0.0)
        {
            mw_printf("stream sigma 0.0 is invalid\n");
            mwFreeA(sc);
            return NULL;
        }

        sc[i].large_sigma = (stream_sigma > SIGMA_LIMIT || stream_sigma < -SIGMA_LIMIT);
        sigma_sq2 = 2.0 * sqr(stream_sigma);

        sc[i].sigma_sq2_inv = 1.0 / sigma_sq2;

        sc[i].a = streamA(&streams->parameters[i]);
        sc[i].c = streamC(ap,
                          ap->wedge,
                          streams->parameters[i].mu,
                          streams->parameters[i].r);
    }

    return sc;
}
/*Functions for reading parameters from files */
real* fread_double_array(FILE* file, const char* array_name, unsigned int* sizeOut)
{
    unsigned int i, size;
    int rc;
    real* arr;

    fscanf(file, array_name);
    fscanf(file, "[%u]: ", &size);

    arr = (real*) mwMalloc(sizeof(real) * size);

    for (i = 0; i < size; i++)
    {
        rc = fscanf(file, READ_DOUBLE_ARRAY_READ_STR, &arr[i]);
        if (rc != 1)
        {
            mw_printf("Error reading into %s\n", array_name);
            free(arr);
            return NULL;
        }

        if (i < size - 1)
            fscanf(file, ", ");
    }
    fscanf(file, "\n");

    if (sizeOut)
        *sizeOut = size;

    return arr;
}
/* If using BOINC try reading a few of the settings from the project
 * preferences. If command line arguments are used, those will
 * override the preferences. The command line arguments will also
 * still work without BOINC */
static void separationReadPreferences(SeparationPrefs* prefsOut)
{
    static SeparationPrefs prefs;
    static MWProjectPrefs sepPrefs[] =
        {
            { "gpu_target_frequency", MW_PREF_DOUBLE, FALSE, &prefs.gpuTargetFrequency   },
            { "gpu_wait_factor",      MW_PREF_DOUBLE, FALSE, &prefs.gpuWaitFactor        },
            { "gpu_non_responsive",   MW_PREF_BOOL,   FALSE, &prefs.gpuNonResponsive     },
            { "gpu_process_priority", MW_PREF_INT,    FALSE, &prefs.gpuProcessPriority   },
            { "no_gpu_checkpoint",    MW_PREF_BOOL,   FALSE, &prefs.gpuDisableCheckpoint },
            END_MW_PROJECT_PREFS
        };

    prefs.gpuTargetFrequency   = DEFAULT_TARGET_FREQUENCY;
    prefs.gpuWaitFactor        = DEFAULT_WAIT_FACTOR;
    prefs.gpuNonResponsive     = DEFAULT_NON_RESPONSIVE;
    prefs.gpuProcessPriority   = DEFAULT_GPU_PRIORITY;
    prefs.gpuDisableCheckpoint = DEFAULT_DISABLE_GPU_CHECKPOINTING;

    if (BOINC_APPLICATION)
    {
        if (mwGetAppInitData())
        {
            mw_printf("Error reading app init data. Project preferences will not be used\n");
        }
        else
        {
            mwReadProjectPrefs(sepPrefs, mwGetProjectPrefs());
        }
    }

    *prefsOut = prefs;
}
static cl_int runNuStep(CLInfo* ci, const IntegralArea* ia, const RunSizes* runSizes, cl_uint nu_step)
{
    cl_uint i;
    cl_int err = CL_SUCCESS;
    size_t offset[1];

    err = setNuKernelArgs(ia, nu_step);
    if (err != CL_SUCCESS)
    {
        mw_printf("Failed to set nu kernel argument\n");
        return err;
    }

    mw_begin_critical_section();

    offset[0] = 0;
    for (i = 0; i < runSizes->nChunk && err == CL_SUCCESS; ++i)
    {
        err = runIntegralKernel(ci, runSizes, offset);
        checkQuitRequest();         /* Kernel has finished by now */
        offset[0] += runSizes->global[0];
    }

    mw_end_critical_section();

    return err;
}
void mwPrintDevInfoShort(const DevInfo* di)
{
    mw_printf("Device '%s' (%s:0x%x) (%s)\n"
              "Driver version:      %s\n"
              "Version:             %s\n"
              "Compute capability:  %u.%u\n"
              "Max compute units:   %u\n"
              "Clock frequency:     %u Mhz\n"
              "Global mem size:     "LLU"\n"
              "Local mem size:      "LLU"\n"
              "Max const buf size:  "LLU"\n"
              "Double extension:    %s\n",
              di->devName,
              di->vendor, di->vendorID,
              showCLDeviceType(di->devType),
              di->driver,
              di->version,
              di->computeCapabilityMajor, di->computeCapabilityMinor,
              di->maxCompUnits,
              di->clockFreq,
              di->memSize,
              di->localMemSize,
              di->maxConstBufSize,
              showMWDoubleExts(di->doubleExts)
        );
}
int* fread_int_array(FILE *file, const char *array_name, unsigned int* sizeOut)
{
    unsigned int i, size;
    int* arr;

    fscanf(file, array_name);
    fscanf(file, "[%u]: ", &size);

    arr = (int*) mwMalloc(sizeof(int) * size);

    for (i = 0; i < size; i++)
    {
        if (fscanf(file, "%d", &arr[i]) != 1)
        {
            mw_printf("Error reading into %s\n", array_name);
            free(arr);
            return NULL;
        }

        if (i < size - 1)
            fscanf(file, ", ");
    }
    fscanf(file, "\n");

    if (sizeOut)
        *sizeOut = size;

    return arr;
}
/* The transactional stuff is only available on Vista and later */
static void initW32TransactionalFunctions()
{
    HMODULE ktm32Lib;
    HMODULE kernel32Lib;

    transactionFuncsInit = TRUE;

    kernel32Lib = LoadLibrary("Kernel32.dll");
    if (!kernel32Lib)
    {
        mwPerrorW32("Could not load Kernel32.dll");
        return;
    }

    ktm32Lib = LoadLibrary("KtmW32.dll");
    if (!ktm32Lib)
    {
        mwPerrorW32("Could not load Ktm32.dll");
        return;
    }

    __CreateTransaction = GetProcAddress(ktm32Lib, "CreateTransaction");
    __CommitTransaction = GetProcAddress(ktm32Lib, "CommitTransaction");
    __MoveFileTransacted = GetProcAddress(kernel32Lib, "MoveFileTransactedA");

    transactionFuncsOK = (__CreateTransaction && __MoveFileTransacted && __CommitTransaction);

    if (!transactionFuncsOK)
    {
        mw_printf("Failed to get transaction functions\n");
    }
}
NBodyStatus nbRunSystemPlain(const NBodyCtx* ctx, NBodyState* st)
{
    NBodyStatus rc = NBODY_SUCCESS;

    rc |= nbGravMap(ctx, st); /* Calculate accelerations for 1st step this episode */
    if (nbStatusIsFatal(rc))
        return rc;

    while (st->step < ctx->nStep)
    {
        nbAddTracePoint(ctx, st);
        nbUpdateDisplayedBodies(ctx, st);
        rc |= nbStepSystemPlain(ctx, st);
        if (nbStatusIsFatal(rc))   /* advance N-body system */
            return rc;

        rc |= nbCheckpoint(ctx, st);
        if (nbStatusIsFatal(rc))
            return rc;

        nbReportProgress(ctx, st);
    }

    if (BOINC_APPLICATION || ctx->checkpointT >= 0)
    {
        mw_report("Making final checkpoint\n");
        if (nbWriteCheckpoint(ctx, st))
        {
            mw_printf("Failed to write final checkpoint\n");
            return NBODY_CHECKPOINT_ERROR;
        }
    }

    return rc;
}
cl_int mwSelectDevice(CLInfo* ci, const cl_device_id* devs, const CLRequest* clr, const cl_uint nDev)
{
    cl_int err = CL_SUCCESS;

    if (clr->devNum >= nDev)
    {
        mw_printf("Requested device is out of range of number found devices\n");
        return MW_CL_ERROR;
    }

    ci->dev = devs[clr->devNum];
    err = mwGetDeviceType(ci->dev, &ci->devType);
    if (err != CL_SUCCESS)
        mw_printf("Failed to find type of device %u\n", clr->devNum);

    return err;
}
static void CL_CALLBACK contextCallback(const char* errInfo,
                                        const void* privateInfo,
                                        size_t cb,
                                        void* userData)
{
    (void) privateInfo, (void) cb, (void) userData;
    mw_printf("CL context error: %s\n", errInfo);
}
NBodyStatus nbInitNBodyStateCL(NBodyState* st, const NBodyCtx* ctx, const CLRequest* clr)
{
    cl_int err;

    /* Bodies must be set before trying to use this */
    if (!st->bodytab)
    {
        return NBODY_CONSISTENCY_ERROR;
    }

    if (ctx->potentialType == EXTERNAL_POTENTIAL_CUSTOM_LUA)
    {
        mw_printf("Cannot use Lua potential with OpenCL\n");
        return NBODY_UNSUPPORTED;
    }

    st->usesQuad = ctx->useQuad;
    st->usesExact = (ctx->criterion == Exact);
    st->usesCL = TRUE;

    st->ci = mwCalloc(1, sizeof(CLInfo));
    st->nbb = mwCalloc(1, sizeof(NBodyBuffers));
    st->workSizes = mwCalloc(1, sizeof(NBodyWorkSizes));
    st->kernels = mwCalloc(1, sizeof(NBodyKernels));

    err = mwSetupCL(st->ci, clr);
    if (err != CL_SUCCESS)
        return NBODY_CL_ERROR;

    if (!nbCheckDevCapabilities(&st->ci->di, ctx, st->nbody))
        return NBODY_CAPABILITY_ERROR;

    if (nbSetThreadCounts(st->workSizes, &st->ci->di, ctx) || nbSetWorkSizes(st->workSizes, &st->ci->di))
        return NBODY_ERROR;

    st->effNBody = nbFindEffectiveNBody(st->workSizes, st->usesExact, st->nbody);

    if (nbLoadKernels(ctx, st))
        return NBODY_CL_ERROR;

    err = nbCreateBuffers(ctx, st);
    if (err != CL_SUCCESS)
        return NBODY_CL_ERROR;

    err = nbSetInitialTreeStatus(st);
    if (err != CL_SUCCESS)
        return NBODY_CL_ERROR;

    err = nbSetAllKernelArguments(st);
    if (err != CL_SUCCESS)
        return NBODY_CL_ERROR;

    err = nbMarshalBodies(st, CL_TRUE);
    if (err != CL_SUCCESS)
        return NBODY_CL_ERROR;

    return NBODY_SUCCESS;
}
int mwDisableDenormalsSSE(void)
{
    int oldMXCSR = _mm_getcsr();
    int newMXCSR = oldMXCSR | 0x8040;
    _mm_setcsr(newMXCSR);

    mw_printf("Disabled denormals\n");
    return oldMXCSR;
}
int resolveCheckpoint(void)
{
    int rc;

    rc = mw_resolve_filename(CHECKPOINT_FILE, resolvedCheckpointPath, sizeof(resolvedCheckpointPath));
    if (rc)
        mw_printf("Error resolving checkpoint file '%s': %d\n", CHECKPOINT_FILE, rc);
    return rc;
}
示例#30
0
static void emdPrintFlowMatrix(const float* flow, int size1, int size2)
{
    int i, j;
    const int flowStep = 1;

    if (!flow)
    {
        mw_printf("Empty flow\n");
        return;
    }

    for (i = 0; i < size1; ++i)
    {
        for (j = 0; j < size2; ++j)
        {
            mw_printf("Flow[ % d][ % d] = % f\n", i, j, flow[flowStep * j + i]);
        }
    }
}